# Demand Forecast: LSTM Model

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
import matplotlib.pyplot as plt
from keras_tuner.tuners import RandomSearch

# Data Preparation Function
def prepare_data(visitor_data_path, weather_data_path, holiday_data_path):
    # Load and preprocess visitor data
    visitor_data = pd.read_csv(visitor_data_path)
    visitor_data['visit_date'] = pd.to_datetime(visitor_data['visit_date'])

    # Load and preprocess weather data
    weather_data = pd.read_csv(weather_data_path)
    weather_data['Date'] = pd.to_datetime(weather_data['Date'])
    weather_data.drop(columns=weather_data.columns[0:3], inplace=True)

    # Load and preprocess holiday data
    holiday_data = pd.read_csv(holiday_data_path)
    holiday_data['calendar_date'] = pd.to_datetime(holiday_data['calendar_date'])
    holiday_data.rename(columns={'calendar_date': 'calendar_date'}, inplace=True)
    holiday_data.drop(columns="day_of_week", inplace=True)

    # Merge datasets
    merged_data = pd.merge(visitor_data, weather_data, left_on='visit_date', right_on='Date', how='left')
    merged_data = pd.merge(merged_data, holiday_data, left_on='visit_date', right_on='calendar_date', how='left')
    merged_data.drop(columns=["Date", "calendar_date"], inplace=True)

    # Feature engineering
    merged_data = pd.get_dummies(merged_data, columns=['day_of_week'])
    merged_data['heavy_rainfall_flag'] = (merged_data['Daily Rainfall Total (mm)'] > 20).astype(int)
    merged_data['temperature_category'] = pd.cut(merged_data['Mean Temperature (°C)'], bins=[-np.inf, 15, 25, np.inf], labels=['cold', 'mild', 'hot'])
    merged_data['is_holiday'] = merged_data[['holiday_flg_sg', 'holiday_flg_cn', 'holiday_flg_in']].max(axis=1)
    merged_data['week_of_year'] = merged_data['visit_date'].dt.isocalendar().week
    merged_data = pd.get_dummies(merged_data, columns=['temperature_category'])
    merged_data.drop(columns=["Daily Rainfall Total (mm)", "Mean Temperature (°C)", "holiday_flg_sg", "holiday_flg_cn", "holiday_flg_in"], inplace=True)
    
    merged_data['day_of_week'] = merged_data['visit_date'].dt.dayofweek
    merged_data['month'] = merged_data['visit_date'].dt.month
    merged_data.drop(columns=['visit_date'], inplace=True)
    
    return merged_data.reset_index(drop=True)

# Feature Engineering Function
def create_features(df, look_back=1):
    df['visitors'] = df[['visitors']].shift(-look_back)
    df.dropna(inplace=True)
    return df

# Data Scaling and Splitting Function
def scale_split_data(df):
    df_values = df.values.astype('float32')
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(df_values)
    
    train_size = int(len(scaled) * 0.8)
    train, test = scaled[:train_size, :], scaled[train_size:, :]
    return train, test, scaler

# Dataset Preparation Function for LSTM
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), :-1]  # Selecting all features except the target variable for sequence
        X.append(a)
        Y.append(dataset[i + look_back, -1])  # Assuming the target variable is the last column
    return np.array(X), np.array(Y)

# Updated LSTM Model Building Function to include look_back parameter

def build_lstm_model(hp, look_back, num_features):
    model = Sequential([
        Input(shape=(look_back, num_features)),  # Define the input shape explicitly with an Input layer
        LSTM(hp.Int('units', min_value=50, max_value=200, step=50)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model



# Updated LSTM Model Tuning Function to include look_back
def tune_lstm_model(trainX, trainY, look_back, num_features):
    tuner = RandomSearch(
        lambda hp: build_lstm_model(hp, look_back=look_back, num_features=num_features),
        objective='val_loss',
        max_trials=5,
        executions_per_trial=3,
        directory='lstm_tuning',
        project_name='visitor_forecast'
    )

    tuner.search(trainX, trainY, epochs=5, validation_split=0.2, verbose=2)
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

    model = tuner.hypermodel.build(best_hps)
    model.fit(trainX, trainY, epochs=10, batch_size=1, verbose=2)
    return model


# Forecasting Function
def forecast_lstm(model, scaler, look_back, last_data):
    predictions = []
    current_batch = last_data.reshape((1, look_back, -1))
    for i in range(7):
        current_pred = model.predict(current_batch)[0]
        predictions.append(current_pred)
        current_batch = np.append(current_batch[:,1:,:], [[current_pred]], axis=1)
    predictions = scaler.inverse_transform(predictions)
    return predictions

# Main Execution Block
if __name__ == "__main__":
    # Adjust these paths to your datasets
    visitor_data_path = "../data/raw/synthetic_visit_data.csv"
    weather_data_path = "../data/processed/weather_data_cleaned.csv"
    holiday_data_path = "../data/raw/date_info_2324.csv"

    # Data preparation and feature engineering
    df = prepare_data(visitor_data_path, weather_data_path, holiday_data_path)
    look_back = 1
    df = create_features(df, look_back)
    train, test, scaler = scale_split_data(df)
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)

    # Model building, tuning, and forecasting
    num_features = 11  # This should be the number of columns in your dataset minus the target variable column
    model = tune_lstm_model(trainX, trainY, look_back, num_features)
    last_data = test[-look_back:, :-1]  # Exclude the target variable
    predictions = forecast_lstm(model, scaler, look_back, last_data)

    # Plotting the forecast
    plt.figure(figsize=(10,6))
    plt.plot(predictions, label='Forecasted Visitors')
    plt.title('Visitor Forecast with LSTM')
    plt.xlabel('Day')
    plt.ylabel('Visitors')
    plt.legend()
    plt.show()

    # Print the forecasted visitor numbers
    print("Forecasted Visitors for the next 7 days:")
    for i, prediction in enumerate(predictions, 1):
        print(f"Day {i}: {prediction[0]:.2f}")
