PART 2 - VISUALIZATIONS
Using long short-term memory neural network as our machine learning algorithm to predict future index values, we entered the following code into google colab.

In [None]:
# Install necessary libraries (if not already installed)
!pip install tensorflow
!pip install plotly
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import os
# Explicitly import the 'mse' loss function from Keras
from tensorflow.keras.losses import mse
from pandas.tseries.offsets import DateOffset  # Import for date offset
# Upload the dataset
from google.colab import files
uploaded = files.upload()
# Load the dataset
df = pd.read_csv("housing_index_final.csv")
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date")
# List of specific regions to include in the graph
selected_regions = [
    "Halifax, Nova Scotia", "Montréal, Québec", "Ottawa-Gatineau, Quebec part, Ontario/Quebec",
    "London, Ontario", "Hamilton, Ontario", "Toronto, Ontario", "Calgary, Alberta",
    "Vancouver, British Columbia", "Victoria, British Columbia", "Saskatoon, Saskatchewan",
    "Winnipeg, Manitoba", "Edmonton, Alberta"
]
# Get unique regions from dataset
regions = df["Geography"].unique()
# Directory for saving models
model_dir = "saved_models"
os.makedirs(model_dir, exist_ok=True)
# Function to create sequences
def create_sequences(data, seq_length=12):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length])
    return np.array(X), np.array(y)
seq_length = 12  # Using past 12 months
future_steps = 84  # Predicting 7 years (2024-2030)
# Dictionary to store predictions
predictions = {}
for region in regions:
    if region not in selected_regions:
        continue  # Skip regions that are not in the selected list
    df_region = df[df["Geography"] == region].sort_values("Date")
    # Prepare data
    scaler = MinMaxScaler()
    df_region["Scaled_Index"] = scaler.fit_transform(df_region[["Index Value"]])
    X, y = create_sequences(df_region["Scaled_Index"].values, seq_length)
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    # Modify file name to replace commas and spaces with underscores
    model_path = os.path.join(model_dir, f"lstm_{region.replace(', ', '_').replace(' ', '_')}.h5")
    if not os.path.exists(model_path):
        # Build LSTM Model
        model = Sequential([
            LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
            Dropout(0.2),
            LSTM(50, return_sequences=False),
            Dropout(0.2),
            Dense(25),
            Dense(1)
        ])
        model.compile(optimizer='adam', loss=mse)  # Use imported mse
        model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=1)
        model.save(model_path)  # Save model
    else:
        model = load_model(model_path)  # Load pre-trained model
    # Predict future values
    future_preds = []
    input_seq = X[-1].reshape(1, seq_length, 1)  # Use the last sequence for prediction
    for _ in range(future_steps):
        pred = model.predict(input_seq, verbose=0)[0][0]
        future_preds.append(pred)
        input_seq = np.append(input_seq[:, 1:, :], [[[pred]]], axis=1)
    # Rescale predictions
    future_preds = scaler.inverse_transform(np.array(future_preds).reshape(-1, 1))
    # Generate date range from the next month to 7 years in the future
    date_range = pd.date_range(start=df_region["Date"].max() + pd.DateOffset(months=1), periods=future_steps, freq='M')
    predictions[region] = (date_range, future_preds)
# Create Plotly figure with dropdown
fig = go.Figure()
# Add actual and predicted data for each region
for region, (dates, preds) in predictions.items():
    # Get updated df_region with the added prediction
    df_region = df[df["Geography"] == region]
    # Extend df_region with the first prediction point to connect the actual and predicted lines
    columns = df_region.columns  # Get column names
    new_row = pd.DataFrame([{
        'Date': dates[0],  # Use dates[0] for the first prediction date
        'Geography': region,
        'Type': 'Prediction',  # Adjust column name if necessary
        'Index Value': preds[0][0]  # Use preds[0][0] for the first prediction value
    }])
    new_row = new_row.reindex(columns=columns, fill_value=np.nan)  # Align columns
    df_region = pd.concat([df_region, new_row], ignore_index=True)  # Append
    # Add actual data trace (including the added prediction point)
    fig.add_trace(go.Scatter(x=df_region["Date"], y=df_region["Index Value"],
                             mode='lines', name=f"Actual Data ({region})", visible=False))
    # Add predicted data trace
    fig.add_trace(go.Scatter(x=dates, y=preds.flatten(),
                             mode='lines', name=f"Predicted (2024-2030) ({region})", visible=False))
# Set the first region's traces to be visible
fig.data[0].visible = True  # Actual data
fig.data[1].visible = True  # Predicted data
# Create dropdown buttons
dropdown_buttons = []
for i, region in enumerate(predictions.keys()):
    dropdown_buttons.append(
        dict(label=region,
             method='update',
             args=[{'visible': [False] * len(fig.data)}])  # Set all to invisible
    )
    dropdown_buttons[-1]['args'][0]['visible'][2 * i] = True  # Actual data
    dropdown_buttons[-1]['args'][0]['visible'][2 * i + 1] = True  # Predicted data
# Update layout with dropdown
fig.update_layout(
    title="LSTM Prediction of Housing Price Index for Selected Regions (2024-2030)",  # Updated title
    xaxis_title="Year",
    yaxis_title="Housing Price Index",
    updatemenus=[{
        'buttons': dropdown_buttons,
        'direction': 'down',
        'showactive': True,
    }]
)
# Show the figure
fig.show()