In [None]:
#%pip install torchmetrics

**Inbuild libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torchmetrics

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import seaborn as sns
import scipy.stats as stats

In [None]:
#%pip install mlflow

In [None]:
import mlflow
import mlflow.pytorch

**Feature Engineering**

In [None]:
# set an experiment for version control
mlflow.set_experiment('TCN Experiment 1')
# Load the dataset
data = pd.read_csv('../Dataset/superstore-orders.csv')

# Define the target variable
data['Sales_per_Customer'] = data['Sales per Customer']

# Add lag Features for 'Sales per Customer' column
for lag in range(1, 11):  # Sales per Customer data from 1 to 10 rows ago
    data[f'sales_per_customer_lag_{lag}'] = data['Sales per Customer'].shift(lag)

# Add rolling statistics features for 'Sales per Customer' column
data['sales_per_customer_rolling_mean_7'] = data['Sales per Customer'].rolling(window=7).mean()
data['sales_per_customer_rolling_std_7'] = data['Sales per Customer'].rolling(window=7).std()
data['sales_per_customer_rolling_mean_30'] = data['Sales per Customer'].rolling(window=30).mean()
data['sales_per_customer_rolling_std_30'] = data['Sales per Customer'].rolling(window=30).std()

# One-hot encode categorical columns
data = pd.get_dummies(data, columns=['Category', 'Sub-Category'], drop_first=True)

# Handle missing values created by lag/rolling features
data.fillna(method='bfill', inplace=True)

# Drop original Sales per Customer column to avoid redundancy
data.drop(columns=['Sales per Customer'], inplace=True)

# Display first few rows after feature engineering
print(data.head())

# Save to CSV after feature engineering
data.to_csv('../Dataset/engineered_superstore_orders.csv', index=False)

**Data Preparation**

In [None]:
data = pd.read_csv("../Dataset/engineered_superstore_orders.csv")

print(data.describe())

X = data.drop(columns=['Sales_per_Customer'])
y = data['Sales_per_Customer'] # target

# Ensure all columns are numeric
X = pd.DataFrame(X).apply(pd.to_numeric, errors='coerce').values
y = pd.Series(y).apply(pd.to_numeric, errors='coerce').values

# Handle missing values if any
X = np.nan_to_num(X, nan=0.0)
y = np.nan_to_num(y, nan=0.0)

# visualizing distributions of target variables and features
sns.histplot(y, kde=True)
plt.show()

# correlation matrix
columns = ['Profit per Order','Quantity','Sales','Sales_per_Customer']
corr_matrix = data[columns].corr()
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.show()

# box plot for outliers
data[columns].plot(kind='box', subplots=True, layout=(1, len(columns)), sharex=False, sharey=False, figsize=(15, 5))
plt.show()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#convert data into tensor format
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# channel dimension for conv1d
X_train = X_train.unsqueeze(1)
X_test = X_test.unsqueeze(1)

**Model**

In [None]:
class TCN(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size, dropout):
        super(TCN, self).__init__()
        self.conv1 = nn.Conv1d(num_inputs, num_channels, kernel_size, padding=(kernel_size-1))
        self.conv2 = nn.Conv1d(num_channels, num_channels, kernel_size, padding=(kernel_size-1))
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

        # Calculate the size after convolution
        self._calculate_conv_output_shape(num_inputs, kernel_size, padding=(kernel_size-1))
        self.fc = nn.Linear(self.conv_output_size, 1)

    def _calculate_conv_output_shape(self, num_inputs, kernel_size, padding):
        # Calculate the output shape after the convolution layers
        dummy_input = torch.zeros(1, num_inputs, X_train.size(2))
        dummy_output = self.conv2(self.conv1(dummy_input))
        self.conv_output_size = dummy_output.numel()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.dropout(x)
        x = self.relu(self.conv2(x))
        x = self.dropout(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# Initialize the model
num_channels = 64
kernel_size = 2
dropout = 0.2
model = TCN(num_inputs=1, num_channels=num_channels, kernel_size=kernel_size, dropout=dropout)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(
    model.parameters(),
    lr=0.1,              # Learning rate
    betas=(0.9, 0.999),   # Coefficients for running averages
    eps=1e-8,             # Small constant for numerical stability
    weight_decay=1e-5,    # Weight decay (L2 penalty)
    amsgrad=True          # Use AMSGrad variant
)

**Training using MLflow**

In [None]:
# MLFlow run

with mlflow.start_run(run_name='Experiment 1 - Test Run 5') as run:
    mlflow.log_param("num_channels", num_channels)
    mlflow.log_param("kernel_size", kernel_size)
    mlflow.log_param("dropout", dropout)
    mlflow.log_param("learning_rate", 0.1)

    # Training loop
    num_epochs = 100
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs.squeeze(), y_train)
        loss.backward()
        optimizer.step()

        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
            mlflow.log_metric('loss', loss.item(), step=epoch+1)

    # saving the model
    mlflow.pytorch.log_model(model,'model')

In [None]:
with mlflow.start_run(run_id=run.info.run_id):
    model.eval()
    # Disable gradient calculation for evaluation
    with torch.no_grad():
        predictions = model(X_test).squeeze()

    # Calculate metrics
        # Initialize the metrics
        mse_metric = torchmetrics.MeanSquaredError()
        mae_metric = torchmetrics.MeanAbsoluteError()
        rmse_metric = torchmetrics.MeanSquaredError(squared=False)  # RMSE is the square root of MSE
        r2_metric = torchmetrics.R2Score()
        evs_metric = torchmetrics.ExplainedVariance()

        # Compute metrics
        mse = mse_metric(predictions, y_test).item()
        mae = mae_metric(predictions, y_test).item()
        rmse = rmse_metric(predictions, y_test).item()
        r2 = r2_metric(predictions, y_test).item()
        evs = evs_metric(predictions, y_test).item()

        mlflow.log_metric('MSE', mse)
        mlflow.log_metric('MAE', mae)
        mlflow.log_metric('RMSE', rmse)
        mlflow.log_metric('r2', r2)
        mlflow.log_metric('Explained_Variance_Score', evs)

        # Print metrics
        print(f'MSE: {mse:.4f}')
        print(f'MAE: {mae:.4f}')
        print(f'RMSE: {rmse:.4f}')
        print(f'R-squared: {r2:.4f}')
        print(f'Explained Variance Score: {evs:.4f}')

        # Plot actual vs predicted values with different colors
        plt.figure(figsize=(14, 5))
        plt.subplot(1, 2, 1)
        plt.plot(y_test.numpy(), label='Actual', color='blue', alpha=0.6, marker='o', linestyle='None')
        plt.plot(predictions.numpy(), label='Predicted', color='orange', alpha=0.6, marker='x', linestyle='None')
        plt.legend()
        plt.title('Actual vs Predicted')
        plt.xlabel('Sample')
        plt.ylabel('Sales')
        plt.grid(True)
        plt.savefig('../results/images/actual_vs_predicted.png')
        mlflow.log_artifact('../results/images/actual_vs_predicted.png')

        # Residual plot
        residuals = y_test.numpy() - predictions.numpy()
        plt.subplot(1, 2, 2)
        plt.scatter(predictions.numpy(), residuals, alpha=0.5, color='purple')
        plt.hlines(y=0, xmin=min(predictions.numpy()), xmax=max(predictions.numpy()), colors='r')
        plt.title('Residuals Plot')
        plt.xlabel('Predicted')
        plt.ylabel('Residuals')
        plt.grid(True)
        plt.savefig('../results/images/residuals_plot.png')
        mlflow.log_artifact('../results/images/residuals_plot.png')

        plt.tight_layout()
        plt.show()

        # Distribution of residuals
        plt.figure(figsize=(7, 5))
        sns.histplot(residuals, kde=True, color='green')
        plt.title('Distribution of Residuals')
        plt.xlabel('Residuals')
        plt.ylabel('Frequency')
        plt.grid(True)
        plt.savefig('../results/images/residuals_distribution.png')
        mlflow.log_artifact('../results/images/residuals_distribution.png')
        plt.show()

        # Scatter plot of actual vs predicted
        plt.figure(figsize=(7, 5))
        plt.scatter(y_test.numpy(), predictions.numpy(), alpha=0.5)
        plt.plot([min(y_test.numpy()), max(y_test.numpy())], [min(y_test.numpy()), max(y_test.numpy())], color='red', linestyle='--')
        plt.title('Actual vs Predicted Scatter Plot')
        plt.xlabel('Actual Values')
        plt.ylabel('Predicted Values')
        plt.grid(True)
        plt.savefig('../results/images/actual_vs_predicted_scatter.png')
        mlflow.log_artifact('../results/images/actual_vs_predicted_scatter.png')
        plt.show()

        # Q-Q Plot
        plt.figure(figsize=(7, 5))
        stats.probplot(residuals, dist="norm", plot=plt)
        plt.title('Q-Q Plot')
        plt.grid(True)
        plt.savefig('../results/images/qq_plot.png')
        mlflow.log_artifact('../results/images/qq_plot.png')
        plt.show()

**Summary of Interpretation**

**MSE**: 108.9332
Indicates the average squared difference between the predicted and actual values is around 108.93. This suggests that there are some large errors in the predictions.

**MAE**: 8.1420
Indicates that, on average, the model's predictions are off by about 8.14 units. This is easier to interpret and understand compared to MSE.

**RMSE**: 10.4371
This value is in the same units as the target variable (sales units), indicating that the typical prediction error is about 10.44 units. RMSE penalizes larger errors more than MAE.

**R²**: 0.8067
Suggests that the model explains about 80.67% of the variance in the sales data, which is quite good. This indicates a strong relationship between the features and the target variable.

In [None]:
run_id = run.info.run_id
print(f'Run ID: {run_id}')

In [None]:
mlflow.end_run()