In [26]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# load data
data_path = '/content/drive/MyDrive/Team7/ForwardKeys_data.csv'
df = pd.read_csv(data_path)

In [None]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Load data
df = pd.read_csv('/content/drive/MyDrive/Team7/ForwardKeys_data.csv')

# Convert date and datetime
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')  # Adjust format if necessary
df['DayOfWeek'] = df['Date'].dt.dayofweek  # Monday=0, Sunday=6

# Create one-hot encodings for day of the week
dayofweek_onehot = pd.get_dummies(df['DayOfWeek'], prefix='Day')
df = df.join(dayofweek_onehot)

# all location
locations = ['Visitors in Blue Lagoon', 'Visitors in Machu Picchu', 'Visitors in Taj Mahal', 'Visitors in Doge\'s Palace', 'Visitors in Louvre Museum']

# hold sequence data for all location and time slot combinations
sequence_data_all = {}

# normalize
scaler = MinMaxScaler(feature_range=(0, 1))
for location in locations:
    for time_slot in df['Time'].unique():

        # filter data for current location and time slot
        df_filtered = df[df['Time'] == time_slot][['Date', location] + dayofweek_onehot.columns.tolist()].copy()
        df_filtered[location] = scaler.fit_transform(df_filtered[[location]])

        # make data
        sequence_data = df_filtered[[location] + dayofweek_onehot.columns.tolist()].astype(np.float32)

        # create sequences
        def create_sequences(data, seq_length):
            xs, ys = [], []
            for i in range(len(data) - seq_length):
                x = data.iloc[i:(i + seq_length)].values
                y = data.iloc[i + seq_length, 0]
                xs.append(x)
                ys.append(y)
            return np.array(xs), np.array(ys)

        seq_length = 50
        X, y = create_sequences(sequence_data, seq_length)

        # convert PyTorch tensors
        X_tensor = torch.from_numpy(X)
        y_tensor = torch.from_numpy(y).view(-1, 1)
        sequence_data_all[(location, time_slot)] = (X_tensor, y_tensor)


In [None]:
#Taj Mahal
# LSTM model class
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# train and eva
def train_evaluate_model(X_train, X_test, y_train, y_test, time_slot):
    lstm_model = LSTMModel(input_dim=X_train.shape[-1], hidden_dim=64, num_layers=3, output_dim=1)
    lstm_criterion = nn.MSELoss()
    lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.001)

    lstm_num_epochs = 170
    for epoch in range(lstm_num_epochs):
        lstm_model.train()
        lstm_optimizer.zero_grad()
        lstm_output = lstm_model(X_train)
        lstm_loss = lstm_criterion(lstm_output, y_train)
        lstm_loss.backward()
        lstm_optimizer.step()
        if epoch % 10 == 0:
            print(f'Taj Mahal - Epoch {epoch}, Loss: {lstm_loss.item()} at {time_slot}')

    lstm_model.eval()
    with torch.no_grad():
        lstm_predictions = lstm_model(X_test)
        lstm_r_squared = r2_score(y_test.numpy(), lstm_predictions.numpy())
        lstm_rmse = np.sqrt(mean_squared_error(y_test.numpy(), lstm_predictions.numpy()))

        print(f'Time Slot: {time_slot}, LSTM R-squared: {lstm_r_squared}, RMSE: {lstm_rmse}')

location = 'Visitors in Taj Mahal'
time_slots = ['8:00', '10:00', '12:00', '14:00', '16:00', '18:00']
for time_slot in time_slots:
    X_train, X_test, y_train, y_test = train_test_split(*sequence_data_all[(location, time_slot)], test_size=0.2, random_state=42)
    train_evaluate_model(X_train, X_test, y_train, y_test, time_slot)


Taj Mahal - Epoch 0, Loss: 0.34845834970474243 at 8:00
Taj Mahal - Epoch 10, Loss: 0.1133299320936203 at 8:00
Taj Mahal - Epoch 20, Loss: 0.1090138852596283 at 8:00
Taj Mahal - Epoch 30, Loss: 0.10659698396921158 at 8:00
Taj Mahal - Epoch 40, Loss: 0.10611901432275772 at 8:00
Taj Mahal - Epoch 50, Loss: 0.10499054193496704 at 8:00
Taj Mahal - Epoch 60, Loss: 0.10329072922468185 at 8:00
Taj Mahal - Epoch 70, Loss: 0.10054298490285873 at 8:00
Taj Mahal - Epoch 80, Loss: 0.094386026263237 at 8:00
Taj Mahal - Epoch 90, Loss: 0.0755375400185585 at 8:00
Taj Mahal - Epoch 100, Loss: 0.03833676874637604 at 8:00
Taj Mahal - Epoch 110, Loss: 0.029527096077799797 at 8:00
Taj Mahal - Epoch 120, Loss: 0.023243196308612823 at 8:00
Taj Mahal - Epoch 130, Loss: 0.0177286509424448 at 8:00
Taj Mahal - Epoch 140, Loss: 0.013891847804188728 at 8:00
Taj Mahal - Epoch 150, Loss: 0.012651271186769009 at 8:00
Taj Mahal - Epoch 160, Loss: 0.012071150355041027 at 8:00
Time Slot: 8:00, LSTM R-squared: 0.87791444

In [None]:
#Louvre Museum
#LsTM
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
df['DayOfWeek'] = df['Date'].dt.dayofweek  # Monday=0, Sunday=6

# Create one hot encodings for day of the week
dayofweek_onehot = pd.get_dummies(df['DayOfWeek'], prefix='Day')
df = pd.concat([df, dayofweek_onehot], axis=1)

# normalize
scaler = MinMaxScaler(feature_range=(0, 1))
location = 'Visitors in Louvre Museum'
time_slots = ['8:00', '10:00', '12:00', '14:00', '16:00', '18:00']
sequence_data_all = {}

for time_slot in time_slots:
    df_filtered = df[df['Time'] == time_slot][['Date', location] + dayofweek_onehot.columns.tolist()].copy()
    df_filtered[location] = scaler.fit_transform(df_filtered[[location]])
    sequence_data = df_filtered[[location] + dayofweek_onehot.columns.tolist()].astype(np.float32)

    # Create sequences
    def create_sequences(data, seq_length=50):
        xs, ys = [], []
        for i in range(len(data) - seq_length):
            x = data.iloc[i:(i + seq_length)].values
            y = data.iloc[i + seq_length, 0]
            xs.append(x)
            ys.append(y)
        return np.array(xs), np.array(ys)

    X, y = create_sequences(sequence_data)
    X_tensor = torch.from_numpy(X).float()
    y_tensor = torch.from_numpy(y).float().view(-1, 1)
    sequence_data_all[time_slot] = (X_tensor, y_tensor)

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

def train_evaluate_model(X_train, X_test, y_train, y_test):
    lstm_model = LSTMModel(input_dim=X_train.shape[-1], hidden_dim=64, num_layers=3, output_dim=1)
    lstm_criterion = nn.MSELoss()
    lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.001)

    lstm_num_epochs = 170
    for epoch in range(lstm_num_epochs):
        lstm_model.train()
        lstm_optimizer.zero_grad()
        lstm_output = lstm_model(X_train)
        lstm_loss = lstm_criterion(lstm_output, y_train)
        lstm_loss.backward()
        lstm_optimizer.step()

    lstm_model.eval()
    with torch.no_grad():
        lstm_predictions = lstm_model(X_test)

for time_slot in time_slots:
    X_train, X_test, y_train, y_test = train_test_split(sequence_data_all[time_slot][0], sequence_data_all[time_slot][1], test_size=0.2, random_state=42)
    train_evaluate_model(X_train, X_test, y_train, y_test)


In [None]:
# Blue lagoon
# LSTM
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# train and eva
def train_evaluate_model(X_train, X_test, y_train, y_test, time_slot, location):
    lstm_model = LSTMModel(input_dim=X_train.shape[-1], hidden_dim=64, num_layers=3, output_dim=1)
    lstm_criterion = nn.MSELoss()
    lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.001)

    lstm_num_epochs = 170
    for epoch in range(lstm_num_epochs):
        lstm_model.train()
        lstm_optimizer.zero_grad()
        lstm_output = lstm_model(X_train)
        lstm_loss = lstm_criterion(lstm_output, y_train)
        lstm_loss.backward()
        lstm_optimizer.step()


    lstm_model.eval()
    with torch.no_grad():
        lstm_predictions = lstm_model(X_test)

location = 'Visitors in Blue Lagoon'
time_slots = ['8:00', '10:00', '12:00', '14:00', '16:00', '18:00']
for time_slot in time_slots:
    X_train, X_test, y_train, y_test = train_test_split(*sequence_data_all[(location, time_slot)], test_size=0.2, random_state=42)
    train_evaluate_model(X_train, X_test, y_train, y_test, time_slot, location)


In [30]:
# Machu Picchu
# LSTM
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# train and eva
def train_evaluate_model(X_train, X_test, y_train, y_test, time_slot, location):
    lstm_model = LSTMModel(input_dim=X_train.shape[-1], hidden_dim=64, num_layers=3, output_dim=1)
    lstm_criterion = nn.MSELoss()
    lstm_optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.001)

    lstm_num_epochs = 170
    for epoch in range(lstm_num_epochs):
        lstm_model.train()
        lstm_optimizer.zero_grad()
        lstm_output = lstm_model(X_train)
        lstm_loss = lstm_criterion(lstm_output, y_train)
        lstm_loss.backward()
        lstm_optimizer.step()


    lstm_model.eval()
    with torch.no_grad():
        lstm_predictions = lstm_model(X_test)
        lstm_r_squared = r2_score(y_test.numpy(), lstm_predictions.numpy())

location = 'Visitors in Machu Picchu'
time_slots = ['8:00', '10:00', '12:00', '14:00', '16:00', '18:00']

for time_slot in time_slots:
    if time_slot in sequence_data_all:
        X_train, X_test, y_train, y_test = train_test_split(*sequence_data_all[time_slot], test_size=0.2, random_state=42)
        train_evaluate_model(X_train, X_test, y_train, y_test, time_slot, location)
    else:
        print(f"Data not found for {location} at {time_slot}")



In [28]:
print(sequence_data_all.keys())


dict_keys(['8:00', '10:00', '12:00', '14:00', '16:00', '18:00'])
