In [57]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F

In [58]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [59]:
path1 = '/content/drive/MyDrive/VU/ANN/encoded_data.csv'
path2 = '/content/drive/MyDrive/VU/ANN/prepared_test_data.csv'
train_df = pd.read_csv(path1)
test_df = pd.read_csv(path2)

# Rearrange data to time windowed series for TCN input

In [60]:
# Separate TCN series from the rest:

Y_train = train_df['sales']
ids = train_df['id']
onpromotion_plus_16 = train_df['onpromotion+16']

train_df = train_df.drop('id', axis=1)
train_df = train_df.drop('onpromotion+16', axis=1)
train_df = train_df.drop('sales', axis=1)
train_df = train_df.drop('store_nbr', axis=1)



In [61]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,normalized_sales
0,0.26825,-0.938469,0.634321,0.046035,-0.882299,-0.710946,0.262586,0.072921,-0.574389,1.074625,-0.101923,0.02783,-0.481265,0.377082,-0.468952,-0.307584,-0.324819
1,-0.085381,-0.871874,0.105006,0.111968,-1.210725,-0.05278,0.394698,0.569185,-0.124199,1.012168,0.635995,-0.173337,0.478804,-0.573674,-0.359466,-0.065601,-0.323907
2,0.81637,-0.383265,0.416107,-0.344449,-0.946493,-0.641333,-0.529246,0.648588,0.316719,0.787706,0.00584,-0.22259,1.21227,0.035632,-0.624357,-0.38734,-0.323907
3,-1.277721,0.370842,-0.682122,-0.163278,0.46726,-0.038031,-0.429528,0.456781,0.248105,-0.402676,0.688359,0.431799,0.502667,-0.456453,-0.196897,-0.206376,-0.322083
4,-0.832109,0.526539,-0.698715,0.232769,1.177772,0.081507,0.274694,0.752656,-0.368253,-0.673652,0.264862,0.492198,-0.048708,0.334176,0.403294,-0.81958,-0.324819


In [62]:
# Convert the DataFrame to a numpy array
# encoded_historical_array = train_df.values           # dataframe is order by each product & store pair that has a a historical sequence of 1713 samples
# Convert the numpy array to a PyTorch tensor
encoded_historical_tensor = torch.tensor(train_df.values, dtype=torch.float32)

In [63]:
N = 32  # sequence length
P = 16  # prediction length

sequences = []  # of channels
targets = []
future_onpromotions = []
test_futue_onpromotions = []
test_sequences = []

# Each store-item has 1713 samples
samples_per_combination = 1713

# For each unique historical sequence of (product,store) touple:
for i in range(0, len(encoded_historical_tensor), samples_per_combination):
    # Make a sequence with sliding window & it's corresponding targets
  for j in range(i, i + samples_per_combination - (N + P) + 1):
        sequences.append(encoded_historical_tensor[j:j+N])
        targets.append(Y_train.values[j+N:j+N+P])
        future_onpromotions.append(onpromotion_plus_16.values[j+N:j+N+P])

  # And gather the last N window for test set:
  if i == 0:
    test_sequences.append(encoded_historical_tensor[samples_per_combination-N:samples_per_combination])
    test_futue_onpromotions.append(onpromotion_plus_16[samples_per_combination-P:samples_per_combination])
  else:
    test_sequences.append(encoded_historical_tensor[i+samples_per_combination-N:i+samples_per_combination])
    test_futue_onpromotions.append(onpromotion_plus_16[i+samples_per_combination-P:i+samples_per_combination])


# Convert train sequences, targets and future onpromotions & test sequences to tensors:
sequences_tensor = torch.stack(sequences).transpose(1, 2)
targets_tensor = torch.tensor(np.array(targets)).float()
future_onpromotions = torch.tensor(np.array(future_onpromotions)).float()

test_sequences = torch.stack(test_sequences).transpose(1, 2)
test_futue_onpromotions = torch.tensor(np.array(test_futue_onpromotions)).float()


# Create a new Dataset
tcn_dataset = TensorDataset(sequences_tensor, targets_tensor, future_onpromotions)


# DataLoader for TCN
optimal_num_workers = 2
tcn_dataloader = DataLoader(tcn_dataset, batch_size=(1713-N)*33, shuffle=True, num_workers=optimal_num_workers)

In [64]:
test_futue_onpromotions.size()

torch.Size([1782, 16])

In [65]:
# Clear up RAM:
del train_df
del test_df
# del encoded_historical_array
del sequences
del targets

gc.collect()

# Model Initialization part 1
input_size = len(encoded_historical_tensor[2])  # Number of features
del encoded_historical_tensor

#Model & training:

In [66]:
class TCNBlock(nn.Module):

    def __init__(self, in_channels, out_channels, kernel_size, dilation):
        """
        Initializes the TCN block.

        Parameters:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            kernel_size (int): Size of the convolutional kernel.
            dilation (int): Dilation rate for the convolution, providing the ability to capture long-range dependencies.
        """
        super(TCNBlock, self).__init__()
        padding = (kernel_size - 1) * dilation

        # Convolutional layer
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, padding=padding, dilation=dilation)

        # Batch normalization layer
        self.bn = nn.BatchNorm1d(out_channels)

        # Activation function
        self.relu = nn.ReLU()

        # Skip connection
        self.skip_connection = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None

    def forward(self, x):
        """
        Forward pass of the TCN block. Implements causal convolution by removing the excess padding from the right.

        Parameters:
            x (torch.Tensor): Input tensor of shape (batch_size, in_channels, sequence_length).

        Returns:
            torch.Tensor: Output tensor after applying the block.
        """
        # Causal convolution
        out = self.conv(x)
        out = out[:, :, :-self.conv.padding[0]]

        # Apply skip connection
        identity = x if self.skip_connection is None else self.skip_connection(x)
        out += identity  # Element-wise addition

        return self.relu(out)


class TCN(nn.Module):
    """
    Temporal Convolutional Network (TCN) architecture.

    Attributes:
        network (nn.Sequential): Sequence of TCN blocks.
        fc1 (nn.Linear): First fully connected layer.
        fc2 (nn.Linear): Second fully connected layer.
    """

    def __init__(self, input_size, output_size, num_channels, kernel_size=2):
        """
        Initializes the TCN.

        Parameters:
            input_size (int): Number of input channels.
            output_size (int): Number of output values to predict (in our case, 16).
            num_channels (list): List of output channels for each TCN block.
            kernel_size (int, optional): Size of the convolutional kernel. Defaults to 2.
        """
        super(TCN, self).__init__()
        layers = []
        num_levels = len(num_channels)
        # Set up convolutional layers
        for i in range(num_levels):
            dilation = 2**i
            in_channels = input_size if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers.append(TCNBlock(in_channels, out_channels, kernel_size, dilation))

        self.network = nn.Sequential(*layers)
        self.fc1 = nn.Linear(num_channels[-1] + 16, 64)  # The first fully connected layer
        self.fc2 = nn.Linear(64, output_size)  # The second fully connected layer

    def forward(self, x, add_vector):
        """
        Forward pass of the TCN.

        Parameters:
            x (torch.Tensor): Input tensor of shape (batch_size, input_size, sequence_length).
            add_vector (torch.Tensor): Additional vector of shape (batch_size, 16).

        Returns:
            torch.Tensor: Predictions of shape (batch_size, output_size).
        """
        x = self.network(x)
        x = x[:, :, -1]  # Shape: (batch_size, num_channels[-1])
        x = torch.cat((x, add_vector), dim=1)  # Concatenate along the channel dimension
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [67]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model Initialization part 1
output_size = P  # Number of days to predict
num_channels = [32,32,16,16,16]  # Adjust for the desired network depth and width
model = TCN(input_size, output_size, num_channels).to(device)

In [68]:
del Y_train
del sequences_tensor
del targets_tensor
del tcn_dataset
gc.collect()

0

In [69]:
# Loss Function & Optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)


# Training Loop
num_epochs = 10
print_every_n_epochs = 1  # Print updates every epoch

# Training Loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for batch_idx, (data, target, future_onpromotions) in enumerate(tcn_dataloader):
        data = data.to(device)
        target = target.to(device)
        future_onpromotions = future_onpromotions.to(device)

        optimizer.zero_grad()
        outputs = model(data, future_onpromotions)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print loss information every 'print_every_n_epochs' epochs
    if epoch % print_every_n_epochs == 0:
        epoch_loss = running_loss / len(tcn_dataloader)
        print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f}")


Epoch [1/10] Loss: 1348233.8009
Epoch [2/10] Loss: 1302773.0463
Epoch [3/10] Loss: 946424.4329
Epoch [4/10] Loss: 387815.8950


KeyboardInterrupt: ignored

In [70]:
# Save model
torch.save(model.state_dict(), 'tcn_model_weights.pth')

#Model Test:

In [71]:
model.load_state_dict(torch.load('tcn_model_weights.pth'))


# Set model to evaluation mode
model.eval()

# List to store predictions
predictions = []

# Use no_grad to ensure gradient isn't computed
with torch.no_grad():

  # Combine the inputs into a TensorDataset
  test_dataset = TensorDataset(test_sequences, test_futue_onpromotions)

  # Use the TensorDataset in the DataLoader
  test_loader = DataLoader(test_dataset, batch_size=100, shuffle=False)

  for data in test_loader:
      sequence, test_futue_onpromotions_bached = data[0].to(device), data[1].to(device)
      prediction = model(sequence, test_futue_onpromotions_bached)
      predictions.extend(prediction.cpu().numpy())

# Convert predictions to a numpy array
predictions_array = np.array(predictions)

# Convert numpy array to DataFrame
predictions_df = pd.DataFrame(predictions_array)
predictions_df.shape


(1782, 16)

In [72]:
# Create an empty array for reordered predictions
reordered_array = np.zeros((predictions_array.shape[0] * 16,))

# Distribute the predictions based on the described pattern
for idx in range(predictions_array.shape[0]):     # iterating over each store & product pair
    for i in range(16):
        position = idx + i * 1782    # shifting and repeating based on day
        reordered_array[position] = predictions_array[idx, i]

# Convert reordered array to a DataFrame
reordered_predictions_df = pd.DataFrame(reordered_array, columns=['sales'])

# Set index to start from 3000888
reordered_predictions_df.index = np.arange(3000888, 3000888 + len(reordered_predictions_df))

# Rename the index
reordered_predictions_df.index.name = 'id'
reordered_predictions_df.reset_index(inplace=True)

# Ceil negative values
reordered_predictions_df.loc[reordered_predictions_df['sales'] <= 0, 'sales'] = 0


reordered_predictions_df.head(10)

Unnamed: 0,id,sales
0,3000888,211.414444
1,3000889,136.7276
2,3000890,130.96817
3,3000891,1763.272705
4,3000892,143.402954
5,3000893,296.3526
6,3000894,158.358521
7,3000895,471.417786
8,3000896,523.296326
9,3000897,192.70079


# Downloading the final predictions form for Kaggle

In [None]:
reordered_predictions_df.to_csv('/content/drive/MyDrive/VU/ANN/TCN_submission_third.csv', index=False)

In [None]:
reordered_predictions_df['sales'] = reordered_predictions_df['sales'].round(0)
reordered_predictions_df.to_csv('/content/drive/MyDrive/VU/ANN/TCN_submission_third_round.csv', index=False)