## Notebook pour l'entraînement des modèles

## Importation des bibliothèques

In [242]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import pickle
import warnings
warnings.filterwarnings("ignore")

## Préparation des données

In [243]:
# Set up paths
EXPORTS_BY_PRODUCT_TIME_SERIES = r'../data/processed/exports_by_product_time_series.csv'

# Load data
exports_by_product_time_series = pd.read_csv(EXPORTS_BY_PRODUCT_TIME_SERIES)

In [244]:
exports_by_product_time_series.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1940 entries, 0 to 1939
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   product  1940 non-null   object 
 1   year     1940 non-null   int64  
 2   value    1940 non-null   float64
 3   share    1940 non-null   float64
dtypes: float64(2), int64(1), object(1)
memory usage: 60.8+ KB


## Entraînement des modèles

In [245]:
# columns: product, year, value, share
product_list = exports_by_product_time_series['product'].unique()
product_store = {}

for product in product_list:
    # Get the data for the product
    df = exports_by_product_time_series[exports_by_product_time_series['product'] == product]
    # Reset the index
    df.reset_index(drop=True, inplace=True)
    # Store the dataframe
    product_store[product] = {'df': df, 'len': int(df.shape[0])}

In [246]:
print(product_store.keys())
print(product_store['Fertilisers'])

dict_keys(['Fertilisers', 'Vehicles other than railway or tramway rolling stock, and parts and accessories thereof', 'Electrical machinery and equipment and parts thereof; sound recorders and reproducers, television ...', 'Articles of apparel and clothing accessories, not knitted or crocheted', 'Inorganic chemicals; organic or inorganic compounds of precious metals, of rare-earth metals, ...', 'Edible fruit and nuts; peel of citrus fruit or melons', 'Edible vegetables and certain roots and tubers', 'Salt; sulphur; earths and stone; plastering materials, lime and cement', 'Fish and crustaceans, molluscs and other aquatic invertebrates', 'Aircraft, spacecraft, and parts thereof', 'Preparations of meat, of fish, of crustaceans, molluscs or other aquatic invertebrates, or ...', 'Articles of apparel and clothing accessories, knitted or crocheted', 'Nuclear reactors, boilers, machinery and mechanical appliances; parts thereof', 'Sugars and sugar confectionery', 'Mineral fuels, mineral oils a

In [247]:
print('Length of product store:', len(product_store))

Length of product store: 97


In [248]:
# Split the data into training and testing sets
for product in product_store:
    df = product_store[product]['df']
    df_train = df[df['year'] < 2020]
    df_test = df[df['year'] >= 2020]
    product_store[product]['train'] = df_train
    product_store[product]['test'] = df_test

In [249]:
# Calculate the root mean squared error
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

In [250]:
# Forecast the product
def forecast_product(product, order=(5,1,0)):
    # Extract training and testing data
    df_train = product_store[product]['train']
    df_test = product_store[product]['test']
    
    # Fit the ARIMA model
    model = ARIMA(df_train['value'], order=order)
    model_fit = model.fit()
    
    # Forecast the test set
    forecast = model_fit.forecast(steps=len(df_test))
    
    # Get the actual value from the test set
    actual = df_test['value'].values
    
    # Calculate the error
    error = rmse(actual, forecast.values)
    
    return forecast, actual, error

In [251]:
# Test the function
results = {}
for product in product_store:
    print('Forecasting ', product)
    forecast, actual, error = forecast_product(product)
    results[product] = {'forecast': forecast, 'actual': actual, 'error': error}

Forecasting  Fertilisers
Forecasting  Vehicles other than railway or tramway rolling stock, and parts and accessories thereof
Forecasting  Electrical machinery and equipment and parts thereof; sound recorders and reproducers, television ...
Forecasting  Articles of apparel and clothing accessories, not knitted or crocheted
Forecasting  Inorganic chemicals; organic or inorganic compounds of precious metals, of rare-earth metals, ...
Forecasting  Edible fruit and nuts; peel of citrus fruit or melons
Forecasting  Edible vegetables and certain roots and tubers
Forecasting  Salt; sulphur; earths and stone; plastering materials, lime and cement
Forecasting  Fish and crustaceans, molluscs and other aquatic invertebrates
Forecasting  Aircraft, spacecraft, and parts thereof
Forecasting  Preparations of meat, of fish, of crustaceans, molluscs or other aquatic invertebrates, or ...
Forecasting  Articles of apparel and clothing accessories, knitted or crocheted
Forecasting  Nuclear reactors, boile

In [252]:
# Calculate the overall error
overall_error = 0
for product in results:
    overall_error += results[product]['error']
overall_error /= len(results)

In [253]:
for index, product in enumerate(results):
    print("Product: ", product)
    print("RMSE: ", results[product]['error'])
    print("---------------------------------------")
print('Overall RMSE: ', overall_error)

Product:  Fertilisers
RMSE:  5.025637578185864
---------------------------------------
Product:  Vehicles other than railway or tramway rolling stock, and parts and accessories thereof
RMSE:  2.4427141689431395
---------------------------------------
Product:  Electrical machinery and equipment and parts thereof; sound recorders and reproducers, television ...
RMSE:  0.8067556324836216
---------------------------------------
Product:  Articles of apparel and clothing accessories, not knitted or crocheted
RMSE:  0.6497241230823675
---------------------------------------
Product:  Inorganic chemicals; organic or inorganic compounds of precious metals, of rare-earth metals, ...
RMSE:  1.2224581772836463
---------------------------------------
Product:  Edible fruit and nuts; peel of citrus fruit or melons
RMSE:  0.0352126327147356
---------------------------------------
Product:  Edible vegetables and certain roots and tubers
RMSE:  0.30577503938808187
------------------------------------

In [254]:
# Save the model
with open('../models/arima_model.pkl', 'wb') as f:
    pickle.dump(results, f)