# **Setting Up**

## *Mounting Drive*

In [None]:
from google.colab import drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Favorita_Subset/'

Mounted at /content/drive


## *Importing Libaries for the entire notebook (will be continuously updated)*

In [None]:
# !pip install xgboost

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
# from sklearn.ensemble import RandomForestRegressor
# import xgboost as xgb

# **Prebuilding**

## *Exploring Features*

In [None]:
# # load potential features
oil = pd.read_csv(path + 'oil_2015.csv')
weather = pd.read_csv(path + 'weather_2015.csv')
holidays = pd.read_csv(path + 'holidays_2015.csv')
items = pd.read_csv(path + 'items.csv')
stores = pd.read_csv(path + 'quito_stores.csv')
transactions = pd.read_csv(path + 'transactions_2015.csv')


In [None]:
oil.head()

Unnamed: 0,date,dcoilwtico
0,2015-01-01,
1,2015-01-02,52.72
2,2015-01-05,50.05
3,2015-01-06,47.98
4,2015-01-07,48.69


In [None]:
holidays.head()

Unnamed: 0,date,type,locale,locale_name,description,transferred
0,2015-01-01,Holiday,National,Ecuador,Primer dia del ano,False
1,2015-01-02,Bridge,National,Ecuador,Puente Primer dia del ano,False
2,2015-01-10,Work Day,National,Ecuador,Recupero Puente Primer dia del ano,False
3,2015-02-16,Holiday,National,Ecuador,Carnaval,False
4,2015-02-17,Holiday,National,Ecuador,Carnaval,False


In [None]:
items.head()

Unnamed: 0,item_nbr,family,class,perishable
0,96995,GROCERY I,1093,0
1,99197,GROCERY I,1067,0
2,103501,CLEANING,3008,0
3,103520,GROCERY I,1028,0
4,103665,BREAD/BAKERY,2712,1


In [None]:
weather.head()

Unnamed: 0,Date,Avg Temperature (C),Min Temperature (C),Max Temperature (C),Precipitation (mm),Snow Depth (mm),Wind Direction (degrees),Wind Speed (km/h),Peak Wind Gust (km/h),Sea Level Pressure (hPa),Sunshine Duration (minutes)
0,2015-01-01,13.4,6.5,20.4,1.0,,,,,,
1,2015-01-02,13.8,7.0,20.4,,,,,,,
2,2015-01-03,14.8,5.4,21.0,,,,,,,
3,2015-01-04,,,,,,,,,,
4,2015-01-05,14.2,4.8,20.1,,,,,,,


In [None]:
stores.head()

Unnamed: 0,store_nbr,city,state,type,cluster
0,1,Quito,Pichincha,D,13
1,2,Quito,Pichincha,D,13
2,3,Quito,Pichincha,D,8
3,4,Quito,Pichincha,D,9
4,6,Quito,Pichincha,D,13


In [None]:
transactions.head()

Unnamed: 0,date,store_nbr,transactions
0,2015-01-01,25,2202
1,2015-01-02,1,1021
2,2015-01-02,2,1859
3,2015-01-02,3,3429
4,2015-01-02,4,1521


## *Visualization*

In [None]:
df = pd.read_csv(path + "train_subset_2015.csv")
df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion
0,38595440,2015-01-02,1,103665,6.0,False
1,38595441,2015-01-02,1,105575,3.0,False
2,38595442,2015-01-02,1,105577,1.0,False
3,38595443,2015-01-02,1,105693,1.0,False
4,38595444,2015-01-02,1,105857,2.0,False


In [None]:
# Check Datatypes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11606961 entries, 0 to 11606960
Data columns (total 6 columns):
 #   Column       Dtype  
---  ------       -----  
 0   id           int64  
 1   date         object 
 2   store_nbr    int64  
 3   item_nbr     int64  
 4   unit_sales   float64
 5   onpromotion  bool   
dtypes: bool(1), float64(1), int64(3), object(1)
memory usage: 453.8+ MB


In [None]:
# Convert date to datetime datatype
df['date'] = pd.to_datetime(df['date'],format='%Y-%m-%d')
df = df.sort_values('date')
df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion
0,38595440,2015-01-02,1,103665,6.0,False
16723,38642907,2015-01-02,45,213653,7.0,False
16722,38642906,2015-01-02,45,213652,17.0,False
16721,38642905,2015-01-02,45,213066,23.0,False
16720,38642904,2015-01-02,45,212552,27.0,False


In [None]:
# Simple visualization function
def plot_simple(df,date,unit_sales,figsize=(12,6)):
    plt.figure(figsize=figsize)
    plt.plot(df[date],df[unit_sales],label = 'Unit Sales',color='blue')
    plt.title("Unit Sales over time")
    plt.xlabel("Date")
    plt.ylabel("Unit Sales")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
# Aesthetic Visualization function
def aesthetic_plot(data,date_column,sales_column,figsize=(12,6)):
    plt.figure(figsize=figsize)
    sns.lineplot(x=date_column, y=sales_column, data=data, label='Unit Sales')
    plt.title('Unit Sales Over Time')
    plt.xlabel('Date')
    plt.ylabel('Unit Sales')
    plt.grid(True)
    plt.show()

In [None]:
# plot_simple(df,'date','unit_sales')

In [None]:
# aesthetic_plot(df,'date','unit_sales')

### *After the visualization,begin preprocessing according to the observed figures*

## *Identifying feature importance*

In [None]:
# Check for missing or negative values in target column
vals = df['unit_sales'].isna() | (df['unit_sales'] < 0)
print(vals.sum())

# Fill with 0
df['unit_sales'] = df['unit_sales'].apply(lambda x:0 if pd.isna(x) or x < 0 else x)
vals = df['unit_sales'].isna() | (df['unit_sales'] < 0)
print(vals.sum())


900
0


In [None]:
# Check for changes
# plot_simple(df,'date','unit_sales')

In [None]:
# aesthetic_plot(df,'date','unit_sales')

### Identifying important features

In [None]:
cols = ['Avg Temperature (C)','Min Temperature (C)','Max Temperature (C)','Date']
weather = weather.drop(columns=[col for col in weather.columns if col not in cols])
weather.head()

Unnamed: 0,Date,Avg Temperature (C),Min Temperature (C),Max Temperature (C)
0,2015-01-01,13.4,6.5,20.4
1,2015-01-02,13.8,7.0,20.4
2,2015-01-03,14.8,5.4,21.0
3,2015-01-04,,,
4,2015-01-05,14.2,4.8,20.1


In [None]:
weather['Max Temperature (C)'] = weather['Max Temperature (C)'].interpolate(method="linear")
weather['Avg Temperature (C)'] = weather['Avg Temperature (C)'].interpolate(method="linear")
weather['Min Temperature (C)'] = weather['Min Temperature (C)'].interpolate(method="linear")

In [None]:
# merge the different csvs with training set as the base csv

merged_df = df.merge(stores,on='store_nbr',how='left')
merged_df = merged_df.merge(items,on='item_nbr',how='left')
oil['date'] = pd.to_datetime(oil['date'],format='%Y-%m-%d')
merged_df = merged_df.merge(oil,on='date',how='left')
weather['Date'] = pd.to_datetime(weather['Date'],format='%Y-%m-%d')
merged_df = merged_df.merge(weather,left_on='date',right_on='Date',how='left')
holidays['date'] = pd.to_datetime(holidays['date'],format='%Y-%m-%d')
merged_df = merged_df.merge(holidays,on='date',how='left')
transactions['date'] = pd.to_datetime(transactions['date'],format='%Y-%m-%d')
merged_df = merged_df.merge(transactions,on=['date','store_nbr'],how='left')

merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,city,state,type_x,cluster,...,Date,Avg Temperature (C),Min Temperature (C),Max Temperature (C),type_y,locale,locale_name,description,transferred,transactions
0,38595440,2015-01-02,1,103665,6.0,False,Quito,Pichincha,D,13,...,2015-01-02,13.8,7.0,20.4,Bridge,National,Ecuador,Puente Primer dia del ano,False,1021
1,38642907,2015-01-02,45,213653,7.0,False,Quito,Pichincha,A,11,...,2015-01-02,13.8,7.0,20.4,Bridge,National,Ecuador,Puente Primer dia del ano,False,4950
2,38642906,2015-01-02,45,213652,17.0,False,Quito,Pichincha,A,11,...,2015-01-02,13.8,7.0,20.4,Bridge,National,Ecuador,Puente Primer dia del ano,False,4950
3,38642905,2015-01-02,45,213066,23.0,False,Quito,Pichincha,A,11,...,2015-01-02,13.8,7.0,20.4,Bridge,National,Ecuador,Puente Primer dia del ano,False,4950
4,38642904,2015-01-02,45,212552,27.0,False,Quito,Pichincha,A,11,...,2015-01-02,13.8,7.0,20.4,Bridge,National,Ecuador,Puente Primer dia del ano,False,4950


In [None]:
merged_df = merged_df.drop(columns=['city','Date','locale','locale_name','state'])
merged_df = merged_df.rename(columns={'type_x':'store_type','type_y':'holiday_type','description':'holiday_description'})
# merged_df = merged_df.drop(columns=['type_x'])
merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,store_type,cluster,family,class,perishable,dcoilwtico,Avg Temperature (C),Min Temperature (C),Max Temperature (C),holiday_type,holiday_description,transferred,transactions
0,38595440,2015-01-02,1,103665,6.0,False,D,13,BREAD/BAKERY,2712,1,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,1021
1,38642907,2015-01-02,45,213653,7.0,False,A,11,GROCERY I,1048,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,4950
2,38642906,2015-01-02,45,213652,17.0,False,A,11,GROCERY I,1048,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,4950
3,38642905,2015-01-02,45,213066,23.0,False,A,11,GROCERY I,1074,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,4950
4,38642904,2015-01-02,45,212552,27.0,False,A,11,GROCERY I,1044,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,4950


In [None]:
merged_df['onpromotion'] = merged_df['onpromotion'].astype(int)
merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,store_type,cluster,family,class,perishable,dcoilwtico,Avg Temperature (C),Min Temperature (C),Max Temperature (C),holiday_type,holiday_description,transferred,transactions
0,38595440,2015-01-02,1,103665,6.0,0,D,13,BREAD/BAKERY,2712,1,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,1021
1,38642907,2015-01-02,45,213653,7.0,0,A,11,GROCERY I,1048,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,4950
2,38642906,2015-01-02,45,213652,17.0,0,A,11,GROCERY I,1048,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,4950
3,38642905,2015-01-02,45,213066,23.0,0,A,11,GROCERY I,1074,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,4950
4,38642904,2015-01-02,45,212552,27.0,0,A,11,GROCERY I,1044,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,False,4950


In [None]:
merged_df['transferred'] = merged_df['transferred'].fillna(0)
merged_df['transferred'] = merged_df['transferred'].astype(int)
# label_encoders = []
merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,store_type,cluster,family,class,perishable,dcoilwtico,Avg Temperature (C),Min Temperature (C),Max Temperature (C),holiday_type,holiday_description,transferred,transactions
0,38595440,2015-01-02,1,103665,6.0,0,D,13,BREAD/BAKERY,2712,1,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,0,1021
1,38642907,2015-01-02,45,213653,7.0,0,A,11,GROCERY I,1048,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950
2,38642906,2015-01-02,45,213652,17.0,0,A,11,GROCERY I,1048,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950
3,38642905,2015-01-02,45,213066,23.0,0,A,11,GROCERY I,1074,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950
4,38642904,2015-01-02,45,212552,27.0,0,A,11,GROCERY I,1044,0,52.72,13.8,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950


In [None]:
# corr_matrix = merged_df.corr()
# print(corr_matrix)

In [None]:
# plt.figure(figsize=(10, 8))
# sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap="coolwarm")
# plt.savefig('correlation_matrix.png', dpi=300, bbox_inches='tight')  # Save as PNG
# plt.show()

In [None]:
merged_df['date'] = pd.to_datetime(merged_df['date'])  # Convert to datetime
merged_df['year'] = merged_df['date'].dt.year
merged_df['month'] = merged_df['date'].dt.month
merged_df['day'] = merged_df['date'].dt.day
merged_df['weekday'] = merged_df['date'].dt.weekday  # Monday=0, Sunday=6

In [None]:
# # Prepare your data (assuming merged_df is already defined)
# X = merged_df.drop(columns=['unit_sales', 'date'])
# y = merged_df['unit_sales']

# # Initialize the XGBoost model
# model = xgb.XGBRegressor()

# # Fit the model
# model.fit(X, y)

# # Extract feature importances
# importance = pd.Series(model.feature_importances_, index=X.columns)

# # Plot the feature importances
# importance.sort_values(ascending=False).plot(kind="bar")
# plt.title('Feature Importance using XGBoost')
# plt.show()

In [None]:
merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,store_type,cluster,family,class,...,Min Temperature (C),Max Temperature (C),holiday_type,holiday_description,transferred,transactions,year,month,day,weekday
0,38595440,2015-01-02,1,103665,6.0,0,D,13,BREAD/BAKERY,2712,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,1021,2015,1,2,4
1,38642907,2015-01-02,45,213653,7.0,0,A,11,GROCERY I,1048,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950,2015,1,2,4
2,38642906,2015-01-02,45,213652,17.0,0,A,11,GROCERY I,1048,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950,2015,1,2,4
3,38642905,2015-01-02,45,213066,23.0,0,A,11,GROCERY I,1074,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950,2015,1,2,4
4,38642904,2015-01-02,45,212552,27.0,0,A,11,GROCERY I,1044,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950,2015,1,2,4


## *Perform final preprocessing based on features identified*

### Dropping Redundant/highly corelated columns

In [None]:
merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,store_type,cluster,family,class,...,Min Temperature (C),Max Temperature (C),holiday_type,holiday_description,transferred,transactions,year,month,day,weekday
0,38595440,2015-01-02,1,103665,6.0,0,D,13,BREAD/BAKERY,2712,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,1021,2015,1,2,4
1,38642907,2015-01-02,45,213653,7.0,0,A,11,GROCERY I,1048,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950,2015,1,2,4
2,38642906,2015-01-02,45,213652,17.0,0,A,11,GROCERY I,1048,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950,2015,1,2,4
3,38642905,2015-01-02,45,213066,23.0,0,A,11,GROCERY I,1074,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950,2015,1,2,4
4,38642904,2015-01-02,45,212552,27.0,0,A,11,GROCERY I,1044,...,7.0,20.4,Bridge,Puente Primer dia del ano,0,4950,2015,1,2,4


In [None]:
# Drop Redundant Columns/Columns with high inter correlation

merged_df = merged_df.drop(columns=['transferred','year','month','Avg Temperature (C)'])
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11746122 entries, 0 to 11746121
Data columns (total 19 columns):
 #   Column               Dtype         
---  ------               -----         
 0   id                   int64         
 1   date                 datetime64[ns]
 2   store_nbr            int64         
 3   item_nbr             int64         
 4   unit_sales           float64       
 5   onpromotion          int64         
 6   store_type           object        
 7   cluster              int64         
 8   family               object        
 9   class                int64         
 10  perishable           int64         
 11  dcoilwtico           float64       
 12  Min Temperature (C)  float64       
 13  Max Temperature (C)  float64       
 14  holiday_type         object        
 15  holiday_description  object        
 16  transactions         int64         
 17  day                  int32         
 18  weekday              int32         
dtypes: datetime64[ns](1

### Identify features to be embedded later on in the model

In [None]:
# for col in ['family','store_type','holiday_description','holiday_type']:
#   le = LabelEncoder()
#   merged_df[col] = le.fit_transform(merged_df[col])
#   # label_encoders[col] = le

In [None]:
merged_df['store_nbr'].nunique(),merged_df['family'].nunique(),merged_df['holiday_type'].nunique()

(18, 32, 5)

In [None]:
merged_df['store_type'].nunique(),merged_df['item_nbr'].nunique(),merged_df['class'].nunique()

(4, 3437, 313)

In [None]:
merged_df['weekday'].nunique(),merged_df['holiday_description'].nunique()

(7, 49)

### Perform Scaling on numerical features

In [None]:
scaler = MinMaxScaler()
numerical_features = ['unit_sales', 'transactions', 'dcoilwtico', 'Min Temperature (C)', 'Max Temperature (C)']
merged_df[numerical_features] = scaler.fit_transform(merged_df[numerical_features])
merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,store_type,cluster,family,class,perishable,dcoilwtico,Min Temperature (C),Max Temperature (C),holiday_type,holiday_description,transactions,day,weekday
0,38595440,2015-01-02,1,103665,0.000606,0,D,13,BREAD/BAKERY,2712,1,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.081602,2,4
1,38642907,2015-01-02,45,213653,0.000707,0,A,11,GROCERY I,1048,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,4
2,38642906,2015-01-02,45,213652,0.001716,0,A,11,GROCERY I,1048,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,4
3,38642905,2015-01-02,45,213066,0.002322,0,A,11,GROCERY I,1074,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,4
4,38642904,2015-01-02,45,212552,0.002726,0,A,11,GROCERY I,1044,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,4


# **Building a Model**

In [None]:
!pip install torch



In [None]:
!pip install pytorch-lightning pytorch-forecasting


Collecting pytorch-lightning
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Collecting pytorch-forecasting
  Downloading pytorch_forecasting-1.2.0-py3-none-any.whl.metadata (13 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.6.0-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)
Collecting lightning<3.0.0,>=2.0.0 (from pytorch-forecasting)
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Collecting aiohttp!=4.0.0a0,!=4.0.0a1 (from fsspec[http]>=2022.5.0->pytorch-lightning)
  Downloading aiohttp-3.11.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning)
  Downloading aiohappyeyeballs-2.4.4-py3-none-any.whl.metadata (6.1 kB)
Collecting aiosignal>=1.1.2

In [None]:
pip show pytorch-lightning pytorch-forecasting torch


Name: pytorch-lightning
Version: 2.4.0
Summary: PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate.
Home-page: https://github.com/Lightning-AI/lightning
Author: Lightning AI et al.
Author-email: pytorch@lightning.ai
License: Apache-2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: fsspec, lightning-utilities, packaging, PyYAML, torch, torchmetrics, tqdm, typing-extensions
Required-by: lightning
---
Name: pytorch-forecasting
Version: 1.2.0
Summary: Forecasting timeseries with PyTorch - dataloaders, normalizers, metrics and models
Home-page: 
Author: Jan Beitner
Author-email: 
License: 
Location: /usr/local/lib/python3.10/dist-packages
Requires: lightning, numpy, pandas, scikit-learn, scipy, torch
Required-by: 
---
Name: torch
Version: 2.5.1+cu121
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorc

#### *NOTE: DUE TO LABEL ENCODING IN PREPROCESSING THE FIRST MODEL PROTOTYPE WILL BE USING CUSTOM EMBEDDING LAYERS INSTEAD OF TFT'S INHERIT EMBEDDING TECHNIQUES*

## Prepare the Dataset

In [None]:
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.models import TemporalFusionTransformer
from pytorch_forecasting.metrics import RMSE
from torch.utils.data import DataLoader
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import EarlyStopping
import torch



In [None]:
merged_df['time_idx'] = (merged_df['date'] - merged_df['date'].min()).dt.days
merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,store_type,cluster,family,class,perishable,dcoilwtico,Min Temperature (C),Max Temperature (C),holiday_type,holiday_description,transactions,day,weekday,time_idx
0,38595440,2015-01-02,1,103665,0.000606,0,D,13,BREAD/BAKERY,2712,1,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.081602,2,4,0
1,38642907,2015-01-02,45,213653,0.000707,0,A,11,GROCERY I,1048,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,4,0
2,38642906,2015-01-02,45,213652,0.001716,0,A,11,GROCERY I,1048,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,4,0
3,38642905,2015-01-02,45,213066,0.002322,0,A,11,GROCERY I,1074,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,4,0
4,38642904,2015-01-02,45,212552,0.002726,0,A,11,GROCERY I,1044,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,4,0


In [None]:
merged_df['store_nbr'] = merged_df['store_nbr'].astype(str)
merged_df['item_nbr'] = merged_df['item_nbr'].astype(str)
merged_df['class'] = merged_df['class'].astype(str)
merged_df['cluster'] = merged_df['cluster'].astype(str)
merged_df['perishable'] = merged_df['perishable'].astype(str)
merged_df['onpromotion'] = merged_df['perishable'].astype(str)
merged_df['weekday'] = merged_df['perishable'].astype(str)
merged_df['holiday_type'] = merged_df['holiday_type'].astype(str)
merged_df['holiday_description'] = merged_df['holiday_description'].astype(str)

In [None]:
merged_df.head()

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion,store_type,cluster,family,class,perishable,dcoilwtico,Min Temperature (C),Max Temperature (C),holiday_type,holiday_description,transactions,day,weekday,time_idx
0,38595440,2015-01-02,1,103665,0.000606,1,D,13,BREAD/BAKERY,2712,1,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.081602,2,1,0
1,38642907,2015-01-02,45,213653,0.000707,0,A,11,GROCERY I,1048,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,0,0
2,38642906,2015-01-02,45,213652,0.001716,0,A,11,GROCERY I,1048,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,0,0
3,38642905,2015-01-02,45,213066,0.002322,0,A,11,GROCERY I,1074,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,0,0
4,38642904,2015-01-02,45,212552,0.002726,0,A,11,GROCERY I,1044,0,0.677732,0.617647,0.617021,Bridge,Puente Primer dia del ano,0.573342,2,0,0


In [None]:
# for column in merged_df.columns:
#     print(f"{column}: {merged_df[column].map(type).unique()}")


In [None]:
# print(merged_df[:100000])
merged_df['dcoilwtico'].isna().sum()
merged_df['dcoilwtico'] = merged_df['dcoilwtico'].interpolate(method='linear')
merged_df['dcoilwtico'].isna().sum()

0

In [None]:
merged_df.shape

(11746122, 20)

In [None]:
sub_df = merged_df.head(1000000)

In [None]:
sub_df.shape

(1000000, 20)

In [None]:
from sklearn.model_selection import train_test_split

# Split data with the first 80% for training and the rest for validation (no shuffling)
train_df, val_df = train_test_split(sub_df, test_size=0.4, shuffle=False)


In [None]:
max_pred_len = 4
min_enc_len = 7
max_enc_len = 15

train_dataset = TimeSeriesDataSet(
    data = train_df,
    time_idx = "time_idx",
    target = "unit_sales",
    group_ids = ["store_nbr","family","class"],
    max_encoder_length = max_enc_len,
    min_encoder_length= min_enc_len,
    max_prediction_length = max_pred_len,
    static_categoricals = ["store_nbr","family","store_type","item_nbr","perishable","class","cluster"],
    time_varying_known_categoricals = ["weekday","holiday_description","holiday_type","onpromotion"],
    time_varying_known_reals = ["dcoilwtico","Min Temperature (C)","Max Temperature (C)"],
    add_relative_time_idx = True,
    add_encoder_length = True,
    allow_missing_timesteps=True,
)

val_dataset = TimeSeriesDataSet(
    data = val_df,
    time_idx = "time_idx",
    target = "unit_sales",
    group_ids = ["store_nbr","family","class"],
    max_encoder_length = max_enc_len,
    min_encoder_length= min_enc_len,
    max_prediction_length = max_pred_len,
    static_categoricals = ["store_nbr","family","store_type","item_nbr","perishable","class","cluster"],
    time_varying_known_categoricals = ["weekday","holiday_description","holiday_type","onpromotion"],
    time_varying_known_reals = ["dcoilwtico","Min Temperature (C)","Max Temperature (C)"],
    add_relative_time_idx = True,
    add_encoder_length = True,
    allow_missing_timesteps=True,
)



In [None]:
# input_sample, target_sample = train_dataset[0]
# print(type(input_sample), type(target_sample))
torch.save(train_dataset,path + "train_mini.pth")
torch.save(val_dataset,path + "val_mini.pth")

In [None]:
# tft = TemporalFusionTransformer(
#     learning_rate=0.001,
#     hidden_size=16,  # size of the LSTM layers and hidden states
#     attention_head_size=4,  # number of attention heads
#     dropout=0.1,  # dropout rate for regularization
#     hidden_continuous_size=8,  # size of hidden layers for continuous variables
#     output_size=1,  # 1 for regression
#     reduce_on_plateau_patience=5,
# )

# print(type(tft))  # Check if it's an instance of LightningModule

<class 'pytorch_forecasting.models.temporal_fusion_transformer.TemporalFusionTransformer'>


/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/utilities/parsing.py:208: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.
  super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)


In [None]:
# for batch_idx, (inputs, targets) in enumerate(train_loader):
#     # Print input shapes (assuming inputs is a dictionary)
#     print(f"Batch {batch_idx + 1}:")
#     for key, value in inputs.items():
#         print(f"  {key} Shape: {value.shape}")

#     # Print target shapes (assuming targets is a tuple)
#     print("Targets Shape:")
#     for i, target in enumerate(targets):
#         print(f"  Target {i} Shape: {target.shape}")

#     break  # Inspect only the first batch


In [None]:
# trainer = Trainer(
#     max_epochs=20,
#     gradient_clip_val=0.1,
#     limit_train_batches=1.0,  # how much of the training dataset to use
#     # callbacks=[EarlyStopping(monitor="val_loss", patience=10)],  # stop early if no improvement
# )

# trainer.fit(tft, train_loader, val_loader)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
INFO: 
   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 0      | train
3  | prescalers                         | ModuleDict                      | 0      | train
4  | static_variable_selection          | VariableSelectionN

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


In [None]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# tft = tft.to(device)

# epochs = 20
# early_stopping_patience = 5
# best_loss = float('inf')
# patience_counter = 0

# for epoch in range(epochs):
#     tft.train()
#     train_loss = 0.0
#     for batch in train_loader:
#         # Check the input keys to make sure they are correct
#         inputs, targets = batch
#         print("Input keys:", inputs.keys())  # This should print the expected keys

#         # Move the inputs to the device
#         inputs = {key: value.to(device) for key, value in inputs.items()}

#         # Move the decoder target to the device
#         decoder_target = targets[0].to(device)  # Assuming targets[0] is the correct target tensor

#         # Forward pass
#         outputs = tft(inputs)
#         loss = tft.loss(outputs, decoder_target)

#         # Backpropagation and optimizer step
#         tft.optimizer.zero_grad()
#         loss.backward()
#         tft.optimizer.step()

#         train_loss += loss.item()

#     train_loss /= len(train_loader)

#     tft.eval()
#     val_loss = 0.0
#     with torch.no_grad():
#         for batch in val_loader:
#             inputs, targets = batch
#             # Check the input keys in validation
#             print("Validation Input keys:", inputs.keys())  # Same check for validation

#             # Move the inputs to the device
#             inputs = {key: value.to(device) for key, value in inputs.items()}

#             # Move the decoder target to the device
#             decoder_target = targets[0].to(device)

#             # Forward pass on validation data
#             outputs = tft(inputs)
#             loss = tft.loss(outputs, decoder_target)
#             val_loss += loss.item()

#     val_loss /= len(val_loader)

#     # Print progress
#     print(f"Epoch {epoch + 1}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

#     # Early stopping logic
#     if val_loss < best_loss:
#         best_loss = val_loss
#         tft.save_model(path + "best_model.pth")  # Save the best model
#         patience_counter = 0
#     else:
#         patience_counter += 1
#         if patience_counter >= early_stopping_patience:
#             print("Early stopping triggered.")
#             break

# print("Training complete.")


Input keys: dict_keys(['encoder_cat', 'encoder_cont', 'encoder_target', 'encoder_lengths', 'decoder_cat', 'decoder_cont', 'decoder_target', 'decoder_lengths', 'decoder_time_idx', 'groups', 'target_scale'])


StopIteration: 