In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
path1 = '/content/drive/MyDrive/VU/ANN/encoded_data.csv'
path2 = '/content/drive/MyDrive/VU/ANN/prepared_test_data.csv'
train_df = pd.read_csv(path1)
test_df = pd.read_csv(path2)

# Rearrange data to time windowed series for TCN input

In [4]:
# Pop out target data and id's for future predictions:
Y_train = train_df.iloc[:, -1]
ids = train_df.iloc[:, -2]
train_df = train_df.drop('id', axis=1)

In [5]:
# Convert the DataFrame to a numpy array
encoded_historical_array = train_df.values           # dataframe is order by each product & store pair that has a a historical sequence of 1713 samples
# Convert the numpy array to a PyTorch tensor
encoded_historical_tensor = torch.tensor(encoded_historical_array, dtype=torch.float32)

In [6]:
N = 32  # sequence length
P = 16  # prediction length

sequences = []  # of channels
targets = []
test_sequences = []

# Each store-item has 1713 samples
samples_per_combination = 1713

# For each unique historical sequence of (product,store) touple:
for i in range(0, len(encoded_historical_tensor), samples_per_combination):
    # Make a sequence with sliding window & it's corresponding targets
  for j in range(i, i + samples_per_combination - (N + P) + 1):
        sequences.append(encoded_historical_tensor[j:j+N])
        targets.append(Y_train.values[j+N:j+N+P])
  # And gather the last N window for test set:
  if i == 0:
    test_sequences.append(encoded_historical_tensor[samples_per_combination-N:samples_per_combination])
  else:
    test_sequences.append(encoded_historical_tensor[i+samples_per_combination-N:i+samples_per_combination])


# Convert train sequences, targets & test sequence to tensors:
sequences_tensor = torch.stack(sequences).transpose(1, 2)
targets_tensor = torch.tensor(np.array(targets)).float()
test_sequences = torch.stack(test_sequences).transpose(1, 2)


# Create a new Dataset
tcn_dataset = TensorDataset(sequences_tensor, targets_tensor)

# DataLoader for TCN
optimal_num_workers = 2
tcn_dataloader = DataLoader(tcn_dataset, batch_size=(1713-N)*33, shuffle=True, num_workers=optimal_num_workers)

In [7]:
# Clear up RAM:
gc.collect()

del train_df
del test_df
del encoded_historical_array
del sequences
del targets

#Model & training:

In [8]:
class TCNBlock(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        """
        Initializes the TCN block.

        Parameters:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            kernel_size (int): Size of the convolutional kernel.
            dilation (int): Dilation rate for the convolution, providing the ability to capture long-range dependencies.
        """
        super(TCNBlock, self).__init__()
        padding = (kernel_size - 1) * dilation

        # Convolutional layer
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation)

        # Batch normalization layer
        self.bn = nn.BatchNorm1d(out_channels)

        # Activation function
        self.relu = nn.ReLU()

        # Skip connection
        self.skip_connection = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None

    def forward(self, x):
        """
        Forward pass of the TCN block. Implements causal convolution by removing the excess padding from the right.

        Parameters:
            x (torch.Tensor): Input tensor of shape (batch_size, in_channels, sequence_length).

        Returns:
            torch.Tensor: Output tensor after applying the block.
        """
        # Causal convolution
        out = self.conv(x)
        out = out[:, :, :-self.conv.padding[0]]

        # Apply skip connection
        identity = x if self.skip_connection is None else self.skip_connection(x)
        out += identity  # Element-wise addition

        return self.relu(out)

class TCN(nn.Module):
    """
    Temporal Convolutional Network (TCN) architecture.

    Attributes:
        network (nn.Sequential): Sequence of TCN blocks.
        fc (nn.Linear): Fully connected layer for producing the final predictions.
    """

    def __init__(self, input_size, output_size, num_channels, kernel_size=2):
        """
        Initializes the TCN.

        Parameters:
            input_size (int): Number of input channels.
            output_size (int): Number of output values to predict (in our case, 16).
            num_channels (list): List of output channels for each TCN block.
            kernel_size (int, optional): Size of the convolutional kernel. Defaults to 2.
        """
        super(TCN, self).__init__()
        layers = []
        num_levels = len(num_channels)
        # Set up convolutional layers
        for i in range(num_levels):
            dilation = 2 ** i
            in_channels = input_size if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers.append(TCNBlock(in_channels, out_channels, kernel_size, dilation))

        self.network = nn.Sequential(*layers)
        self.fc = nn.Linear(num_channels[-1], output_size)

    def forward(self, x):
        """
        Forward pass of the TCN.

        Parameters:
            x (torch.Tensor): Input tensor of shape (batch_size, input_size, sequence_length).

        Returns:
            torch.Tensor: Predictions of shape (batch_size, output_size).
        """
        x = self.network(x)
        x = self.fc(x[:, :, -1])
        return F.relu(x)

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model Initialization
input_size = len(encoded_historical_tensor[2])  # Number of features
del encoded_historical_tensor
output_size = P  # Number of days to predict
num_channels = [32,32,16,16,16]  # Adjust for the desired network depth and width
model = TCN(input_size, output_size, num_channels).to(device)

In [None]:
# Loss Function & Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)


# Training Loop
num_epochs = 15
print_every_n_epochs = 1  # Print updates every epoch

# Training Loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch_idx, (data, target) in enumerate(tcn_dataloader):
        data = data.to(device)
        target = target.to(device)

        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        # loss_value = rmsle(target, outputs)
        # loss = torch.tensor(loss_value, requires_grad=True).to(device)  # Convert back to tensor and move to device
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print loss information every 'print_every_n_epochs' epochs
    if epoch % print_every_n_epochs == 0:
        epoch_loss = running_loss / len(tcn_dataloader)
        print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f}")
        if epoch_loss <= 206000.6939:
          break




Epoch [1/15] Loss: 1114368.2303
Epoch [2/15] Loss: 597196.4549
Epoch [3/15] Loss: 484267.4705
Epoch [4/15] Loss: 476829.8848
Epoch [5/15] Loss: 472080.1933
Epoch [6/15] Loss: 467579.2043
Epoch [7/15] Loss: 416489.0689
Epoch [8/15] Loss: 249598.6004
Epoch [9/15] Loss: 234043.7054


In [None]:
# Save model
torch.save(model.state_dict(), 'tcn_model_weights.pth')

#Model Test:

In [None]:
del Y_train
#del tcn_dataloader

In [None]:
model.load_state_dict(torch.load('tcn_model_weights.pth'))


# Set model to evaluation mode
model.eval()

# List to store predictions
predictions = []

# Use no_grad to ensure gradient isn't computed
with torch.no_grad():

    # Batch predictions
    test_loader = DataLoader(test_sequences, batch_size=100, shuffle=False)

    for batch in test_loader:
        batch = batch.to(device)  # Send to device
        prediction = model(batch)
        predictions.extend(prediction.cpu().numpy())

# Convert predictions to a numpy array
predictions_array = np.array(predictions)

# Convert numpy array to DataFrame
predictions_df = pd.DataFrame(predictions_array)


In [None]:
predictions_df.shape

In [None]:
# Create an empty array for reordered predictions
reordered_array = np.zeros((predictions_array.shape[0] * 16,))

# Distribute the predictions based on the described pattern
for idx in range(predictions_array.shape[0]):     # iterating over each store & product pair
    for i in range(16):
        position = idx + i * 1782    # shifting and repeating based on day
        reordered_array[position] = predictions_array[idx, i]

# Convert reordered array to a DataFrame
reordered_predictions_df = pd.DataFrame(reordered_array, columns=['sales'])

# Set index to start from 3000888
reordered_predictions_df.index = np.arange(3000888, 3000888 + len(reordered_predictions_df))

# Rename the index
reordered_predictions_df.index.name = 'id'
reordered_predictions_df.reset_index(inplace=True)

# Ceil negative values
reordered_predictions_df.loc[reordered_predictions_df['sales'] <= 0, 'sales'] = 0


reordered_predictions_df.head()

# Downloading the final predictions form for Kaggle

In [None]:
reordered_predictions_df.to_csv('/content/drive/MyDrive/VU/ANN/TCN_submission_second.csv', index=False)

In [None]:
reordered_predictions_df['sales'] = reordered_predictions_df['sales'].round(0)
reordered_predictions_df.to_csv('/content/drive/MyDrive/VU/ANN/TCN_submission_second_round.csv', index=False)