# Environment

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import time
import copy
import json
import math
import ast

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [None]:
# path
model_path =  "/content/drive/MyDrive/AITermProject/Models/"
name = str(int(time.time()))
save_path = os.path.join(model_path, name)
if not os.path.isdir(save_path):
  os.mkdir(save_path)
model_save_path = os.path.join(save_path, f"{name}.pt")
print('model_save_path:', model_save_path)
loss_path = os.path.join(save_path, "loss.csv")

# feature file
feature_name = "feature_0.2.csv"
feature_save_path = f"/content/drive/MyDrive/AITermProject/features/{feature_name}"

In [None]:
params = {
    "BatchSize": 64,
    "Epochs": 6,
    "CountSteps": 500,
    "InputSize": 1,
    "OutputChannel": 128,
    "OutputSize": 30,
    "NumLayer": 4,
    "KerneiSize": 3,
    "LearningRate": 0.0005,
    "StepSize": 3,
    "Decay": 5e-4,
    "FcHiddenSize1": 128,
    "FcHiddenSize2": 64,
    "Dropout": 0.3
}

# write parameters
file_name = os.path.join(save_path, "training_parameters.txt")
with open(file_name, 'w') as file:
    for key, value in params.items():
        file.write(f"{key}: {value}\n")

print(f"Parameters saved to {file_name}")

In [None]:
# Training Constants
BatchSize = params['BatchSize']
Epochs = params['Epochs']
CountSteps = params['CountSteps']

# Model Constants
InputSize = params['InputSize']
OutputChannel = params['OutputChannel']
OutputSize = params['OutputSize']
NumLayer = params['NumLayer']
KerneiSize = params['KerneiSize']
LearningRate = params['LearningRate']
StepSize = params['StepSize']
Decay = params['Decay']
FcHiddenSize1 = params['FcHiddenSize1']
FcHiddenSize2 = params['FcHiddenSize2']
Dropout = params['Dropout']

SeqLen = 0 # caltualted in feature

# Other set
FeatureColumns = ['sbi_onehour', 'sno_value', 'act', 'tot', 'sbi', 'lat', 'lng', 'date_value',
                  'time', 'position','week', 'popularity', 'rainfall', 'see_rate_value']

# Data

In [None]:
features = pd.read_csv(feature_save_path)

# sno normalizartion
min_sno = features['sno'].min()
features['sno_value'] = features['sno'] - min_sno

print("data size:", len(features))
print(features.head())

# get data to split train and validation smaples
unique_dates = features['date'].unique()
unique_dates.sort()
print("All unique dates:", unique_dates)
validation_dates = unique_dates[-3:]
print("Validation dates:", validation_dates)

validation_data = features[features['date'].isin(validation_dates)]
training_data = features[~features['date'].isin(validation_dates)]

print("training samples:", len(training_data))
print("validation samples", len(validation_data))

In [None]:
class YouBikeDataset(Dataset):
  def __init__(self, data, mean=None, std=None):
    self.data = data
    self.features = FeatureColumns
    self.target = 'sbi_prediction'

    # Calculate mean and std if not provided
    if mean is None or std is None:
      non_time_data = self.data[self.features[1:]]
      self.mean = non_time_data.mean(axis=0).values
      self.std = non_time_data.std(axis=0).values
    else:
      self.mean = mean
      self.std = std

  def __len__(self):
      return len(self.data)

  def __getitem__(self, idx):
    row = self.data.iloc[idx]
    sbi_onehour = np.array(ast.literal_eval(row[self.features[0]]), dtype=np.float32)
    if sbi_onehour.ndim == 0:
      print("sbi_onehour", row['sno'])
      sbi_onehour = np.expand_dims(sbi_onehour, axis=0)

    non_time_features = np.array([row[feature] for feature in self.features[1:]], dtype=np.float32)
    if non_time_features.ndim == 0:
      print("non_time_features", row['sno'])
      non_time_features = np.expand_dims(non_time_features, axis=0)

    # Normalize non-time features
    non_time_features = (non_time_features - self.mean) / self.std

    input_features = np.concatenate([sbi_onehour, non_time_features])

    target = np.array(ast.literal_eval(row[self.target]), dtype=np.float32)
    return (torch.tensor(input_features, dtype=torch.float32).unsqueeze(0).transpose(0, 1),
            torch.tensor(target, dtype=torch.float32).unsqueeze(0).transpose(0, 1))

# Calculate mean and std on training data
train_non_time_data = training_data[FeatureColumns[1:]]
mean = train_non_time_data.mean(axis=0).values
std = train_non_time_data.std(axis=0).values

train_dataset = YouBikeDataset(training_data, mean, std)
val_dataset = YouBikeDataset(validation_data, mean, std)

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=BatchSize, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BatchSize, shuffle=False)

# check DataLoader
for inputs, targets in train_loader:
  print("Inputs:", inputs.shape)  # (batch_size, seq_len, 1)
  print(list(inputs[0, :, 0]))
  SeqLen = inputs.shape[1]
  print('inputsize', InputSize)
  print("Targets:", targets.shape)  # (batch_size, seq_len, 1)
  print(list(targets[0, :, 0]))
  break

# Model

In [None]:
class TemporalCNN(nn.Module):
  def __init__(self, input_channels, output_channels, num_layers=3, kernel_size=3):
    super(TemporalCNN, self).__init__()
    layers = []
    current_channels = input_channels
    for _ in range(num_layers):
      layers.append(nn.Conv1d(current_channels, output_channels, kernel_size, padding=kernel_size//2))
      layers.append(nn.BatchNorm1d(output_channels))
      layers.append(nn.ReLU())
      current_channels = output_channels
      output_channels = output_channels//2
    self.conv_layers = nn.Sequential(*layers)

  def forward(self, x):
    x = x.transpose(1, 2)  # Conv1d expects (batch, channels, seq_len)
    x = self.conv_layers(x)
    x = x.transpose(1, 2)  # Transpose back to (batch, seq_len, channels)
    x = x.flatten(start_dim=1)  # Flatten the output
    return x

In [None]:
class TemporalCNNWithFC(nn.Module):
    def __init__(self, input_channels, output_channels, num_layers=3, kernel_size=3,
                 fc_hidden_size1=128, fc_hidden_size2=64, dropout_p=0.5, output_size=30,
                 sequence_length=43):
        super(TemporalCNNWithFC, self).__init__()
        self.temporal_cnn = TemporalCNN(input_channels, output_channels, num_layers, kernel_size)
        final_output_channels = output_channels // (2 ** (num_layers - 1))  # output_channels of t-cnn
        fc_insize = final_output_channels * sequence_length
        self.fc1 = nn.Linear(fc_insize, fc_hidden_size1)
        self.dropout1 = nn.Dropout(dropout_p)
        self.fc2 = nn.Linear(fc_hidden_size1, fc_hidden_size2)
        self.dropout2 = nn.Dropout(dropout_p)
        self.fc3 = nn.Linear(fc_hidden_size2, output_size)

    def forward(self, x):
        x = self.temporal_cnn(x)  # (batch_size, seq_len, output_channels)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.fc3(x)  # (batch_size, seq_len, output_size)
        return x.unsqueeze(1)  # (batch_size, 1, output_size)

In [None]:
model = TemporalCNNWithFC(InputSize,
                          OutputChannel,
                          num_layers=NumLayer,
                          kernel_size=KerneiSize,
                          output_size=OutputSize,
                          sequence_length = SeqLen,
                          fc_hidden_size1=FcHiddenSize1,
                          fc_hidden_size2=FcHiddenSize2,
                          dropout_p=Dropout
                          )

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LearningRate, weight_decay=Decay)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=StepSize, gamma=0.1)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training

In [None]:
def write_losses(loss_path, train_losses, dev_losses):
  losses={}
  losses["train"] = train_losses
  losses["test"] = dev_losses
  with open(loss_path, "w") as out_config:
    json.dump(losses, out_config, indent=4)

In [None]:
train_losses = []
test_losses = []
best_loss = float('inf')

for epoch in range(Epochs):
  print(f"[Training] Epoch {epoch}/{Epochs - 1}")
  print("-" * 10)

  # Train
  model.train()
  running_loss_train = 0.0
  intermediate_loss = 0.0
  for count, (features, targets) in enumerate(tqdm(train_loader)):
    features, targets = features.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(features)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    running_loss_train += loss.item()
    intermediate_loss += loss.item()

    # Print training losses after CountSteps
    if count % CountSteps == 0 and count != 0:
      average_loss = intermediate_loss / CountSteps
      print(f"Loss in epoch {epoch} - step {count}: {average_loss:.4f}")
      train_losses.append(average_loss)
      intermediate_loss = 0.0

  scheduler.step()

  epoch_train_loss = running_loss_train / len(train_loader)
  print(f"Training Loss: {epoch_train_loss:.4f}")

  # Evaluation
  model.eval()
  running_loss_test = 0.0
  with torch.no_grad():
    for features, targets in val_loader:
      features, targets = features.to(device), targets.to(device)
      outputs = model(features)
      loss = criterion(outputs, targets)
      running_loss_test += loss.item()

  epoch_test_loss = running_loss_test / len(val_loader)
  test_losses.append(epoch_test_loss)
  print(f"Validation Loss: {epoch_test_loss:.4f}")

  if epoch_test_loss < best_loss:
    write_losses(loss_path, train_losses, test_losses)
    best_loss = epoch_test_loss
    best_model_wts = copy.deepcopy(model.state_dict())
    torch.save(best_model_wts, model_save_path)
    print(f"Updated best model on dev checkpoint: {model_save_path}")