# Autogluon Classifier

Install Autogluon and Bokeh(2.0.1) for Autogluon plots. Autogluon works with Python 3.8 to 3.11

In [1]:
import numpy as np
import pandas as pd
import autogluon
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import bokeh

# Data Preprocessing

In [114]:
df0= pd.read_csv('Sample_Market_SKU_data.csv')
df0.shape
df0.head()

Unnamed: 0,Market,Sales Product,2021-12-01,2022-01-01,2022-02-01,2022-03-01,2022-04-01,2022-05-01,2022-06-01,2022-07-01,...,2024-02-01,2024-03-01,2024-04-01,2024-05-01,2024-06-01,2024-07-01,2024-08-01,2024-09-01,2024-10-01,2024-11-01
0,AFRICA,AB12345,305,302,1503,1471,1583,2080,1490,672,...,787,1417,847,777,1128,1427,846,1475,1572,927
1,ASIA,AB12346,1892,1071,2065,841,1542,2142,691,103,...,615,1166,1538,1206,1129,290,426,762,1775,777
2,OCEANIA,AC12347,1728,1912,193,102,465,1279,1748,1817,...,1834,1392,1746,759,2258,2062,223,142,635,1295
3,EUROPE,AD12348,776,1499,660,1127,214,174,351,1184,...,561,316,1961,1005,1936,331,432,873,1753,870
4,NORTH AMERICA,AS12349,720,1101,1516,1395,967,44,638,521,...,975,1329,933,310,1533,1836,1644,2095,2268,1076


In [125]:
#Create a new column SKUMARKET=ITEM ID
df0["SKUMarket"] = df0["Market"] + "_"+ df0["Sales Product"]
#Drop the original Market and Sales columns
df=df0.iloc[:,2:]
#Move column 'SKUMarket' to first position 
col=df.pop('SKUMarket') 
df.insert(0, col.name, col)
df.head()

Unnamed: 0,SKUMarket,2021-12-01,2022-01-01,2022-02-01,2022-03-01,2022-04-01,2022-05-01,2022-06-01,2022-07-01,2022-08-01,...,2024-02-01,2024-03-01,2024-04-01,2024-05-01,2024-06-01,2024-07-01,2024-08-01,2024-09-01,2024-10-01,2024-11-01
0,AFRICA_AB12345,305,302,1503,1471,1583,2080,1490,672,472,...,787,1417,847,777,1128,1427,846,1475,1572,927
1,ASIA_AB12346,1892,1071,2065,841,1542,2142,691,103,1185,...,615,1166,1538,1206,1129,290,426,762,1775,777
2,OCEANIA_AC12347,1728,1912,193,102,465,1279,1748,1817,1494,...,1834,1392,1746,759,2258,2062,223,142,635,1295
3,EUROPE_AD12348,776,1499,660,1127,214,174,351,1184,225,...,561,316,1961,1005,1936,331,432,873,1753,870
4,NORTH AMERICA_AS12349,720,1101,1516,1395,967,44,638,521,523,...,975,1329,933,310,1533,1836,1644,2095,2268,1076


In [136]:
#Transpose dataframe
dft=df.transpose()
#Set row 1 as header and drop row 1
headers = dft.iloc[0].values
dft.columns = headers
dft.drop('SKUMarket',axis=0, inplace=True)
#Make the dataetime column as column1 and rename it to 'timestamp'
dft=dft.reset_index()
dft = dft.rename(columns={dft.columns[0]: 'timestamp'})
dft.head()

Unnamed: 0,timestamp,AFRICA_AB12345,ASIA_AB12346,OCEANIA_AC12347,EUROPE_AD12348,NORTH AMERICA_AS12349,AFRICA_AB12347,ASIA_AB12348,OCEANIA_AC12348,EUROPE_AD12349,NORTH AMERICA_AS12350,AFRICA_AB12349,ASIA_AB12350,OCEANIA_AC12349,EUROPE_AD12350,NORTH AMERICA_AS12351,AFRICA_AB12351,ASIA_AB12352,OCEANIA_AC12350,EUROPE_AD12351
0,2021-12-01,305,1892,1728,776,720,1132,1699,2236,422,1465,754,936,2135,1078,1677,1384,1358,951,840
1,2022-01-01,302,1071,1912,1499,1101,2255,1165,1493,1666,757,733,1203,2157,1427,233,1022,818,1704,2032
2,2022-02-01,1503,2065,193,660,1516,1712,1238,1543,630,852,1562,2140,1862,196,1291,1915,1073,2066,2016
3,2022-03-01,1471,841,102,1127,1395,922,298,1220,114,1543,1781,1669,1340,911,42,1728,828,1923,1780
4,2022-04-01,1583,1542,465,214,967,1899,625,2241,1552,1707,2151,2169,666,1846,2101,649,377,639,1305


# Create an Autogluon DataFrame with timestamp, id_column and target


In [200]:
#Create  the id_column. Convert it to a DataFrame
df_id = pd.DataFrame(["AFRICA_AB12345"] * 36, columns=["item_id"])
# Merge id_column with with timestamp and target variable
dft0=pd.DataFrame(dft.iloc[:,:2])
#Merge to create a final Autogluon object
agl_df=pd.concat([df_id, dft0], axis=1)
agl_df = agl_df.rename(columns={agl_df.columns[2]: 'target'})
agl_df.head()

Unnamed: 0,item_id,timestamp,target
0,AFRICA_AB12345,2021-12-01,305
1,AFRICA_AB12345,2022-01-01,302
2,AFRICA_AB12345,2022-02-01,1503
3,AFRICA_AB12345,2022-03-01,1471
4,AFRICA_AB12345,2022-04-01,1583


In [215]:
#Change DTYPE or else Autogluon will give you an error
agl_df['target'] = pd.to_numeric(agl_df['target'], errors='coerce')
agl_df['timestamp'] = pd.to_datetime(agl_df['timestamp'], errors='coerce')
agl_df['item_id'] = agl_df['item_id'].astype(str)
agl_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   item_id    36 non-null     object        
 1   timestamp  36 non-null     datetime64[ns]
 2   target     36 non-null     int64         
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 996.0+ bytes


In [217]:
train_data = TimeSeriesDataFrame.from_data_frame(agl_df, timestamp_column="timestamp", id_column="item_id")
train_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,target
item_id,timestamp,Unnamed: 2_level_1
AFRICA_AB12345,2021-12-01,305
AFRICA_AB12345,2022-01-01,302
AFRICA_AB12345,2022-02-01,1503
AFRICA_AB12345,2022-03-01,1471
AFRICA_AB12345,2022-04-01,1583


In [226]:
# Model fit
predictor = TimeSeriesPredictor(
    prediction_length=6,
    path="autogluon-target-monthly",
    target="target",
    eval_metric="MSE",
)

predictor.fit(
    train_data,
    presets="medium_quality",
    time_limit=600,
)

Beginning AutoGluon training... Time limit = 600s
AutoGluon will save models to 'C:\Users\neogi\Documents\Python_venv\autogluon-target-monthly'
AutoGluon Version:  1.2
Python Version:     3.11.9
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
GPU Count:          0
Memory Avail:       16.63 GB / 31.64 GB (52.6%)
Disk Space Avail:   372.09 GB / 475.50 GB (78.3%)
Setting presets to: medium_quality

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': MSE,
 'hyperparameters': 'light',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 6,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'time_limit': 600,
 'verbosity': 2}

Inferred time series frequency: 'MS'
Provided train_data has 36 rows, 1 time series. Median time series length is 36 (min=36, max=36

<autogluon.timeseries.predictor.TimeSeriesPredictor at 0x1dbe0d3f510>

In [224]:
#The forecast always includes predictions for the next prediction_length timesteps, starting from the end of each time series in train_data.
predictions = predictor.predict(train_data)
predictions

Model not specified in predict, will default to the model with the best validation score: WeightedEnsemble


Unnamed: 0_level_0,Unnamed: 1_level_0,mean,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
item_id,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AFRICA_AB12345,2024-12-01,1351.246338,550.697836,684.409167,831.826317,946.769183,1351.246324,1410.932449,1730.33314,1877.551345,2063.848034
AFRICA_AB12345,2025-01-01,818.270874,254.604583,451.881766,542.432036,685.310357,818.270829,1019.442544,1228.270495,1417.831172,1577.016535
AFRICA_AB12345,2025-02-01,846.233398,162.591801,387.981089,518.08324,675.201021,846.233384,1058.502365,1304.385069,1519.277916,1700.808561
AFRICA_AB12345,2025-03-01,880.550903,85.816928,337.419194,501.181258,667.658329,880.550889,1097.939192,1376.497837,1615.654461,1818.867164
AFRICA_AB12345,2025-04-01,921.588135,19.356395,297.377692,488.856118,663.096584,921.58809,1139.803534,1448.820641,1710.213374,1936.069533
AFRICA_AB12345,2025-05-01,968.550415,-37.639562,266.283408,481.347609,662.294465,968.550431,1184.732881,1522.409855,1803.576335,2052.766204


In [227]:
pwd

'C:\\Users\\neogi\\Documents\\Python_venv'

In [248]:
import matplotlib.pyplot as plt

# TimeSeriesDataFrame can also be loaded directly from a file
#test_data = pd.read_csv('Sample_Market_SKU_data_test.csv')
# TimeSeriesDataFrame can also be loaded directly from a file
test= pd.read_csv('Sample_Market_SKU_data_test.csv')
test_data = TimeSeriesDataFrame(test.iloc[:,:3])
test_data

#Plot 4 randomly chosen time series and the respective forecasts
predictor.plot(test_data, predictions, quantile_levels=[0.1, 0.9], max_history_length=50, max_num_item_ids=2);

data with frequency 'None' has been resampled to frequency 'MS'.


In [249]:
predictor.leaderboard(test_data)

data with frequency 'None' has been resampled to frequency 'MS'.
Additional data provided, testing on additional data. Resulting leaderboard will be sorted according to test score (`score_test`).


Unnamed: 0,model,score_test,score_val,pred_time_test,pred_time_val,fit_time_marginal,fit_order
0,DirectTabular,-876912.265042,-100655.8125,0.125333,0.109374,0.94571,4
1,WeightedEnsemble,-885028.46578,-88709.273687,0.206862,0.141337,0.699995,9
2,Chronos[bolt_small],-891918.543986,-186060.774936,1.815791,3.00901,0.03215,7
3,Theta,-892269.16,-128229.725616,25.977143,30.50742,0.017845,6
4,SeasonalNaive,-892269.16,-692151.5,2.542435,1.860922,0.002924,2
5,RecursiveTabular,-892269.16,-695151.750014,3.186189,0.220989,0.982601,3
6,Naive,-892269.16,-282031.833333,3.222901,2.421706,0.019884,1
7,ETS,-892269.16,-109826.283615,5.377212,4.12342,0.018362,5
8,TemporalFusionTransformer,-892274.291196,-98127.959048,0.079519,0.031963,250.696053,8
