<a href="https://colab.research.google.com/github/HENICHE-Thilleli/Time-Series-Forecasting-Using-Machine-Learning/blob/main/XGboost/XG_date_features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/external.csv', parse_dates=True, sep=";")
df['date'] = pd.to_datetime(df['date']) # Convertir la colonne 'date' en objets datetime
df1 = df[['date', 'PriceUSD']]
df1 = df1.set_index('date')
print(df1)

               PriceUSD
date                   
2015-01-02   315.942732
2015-01-03   285.647310
2015-01-04   263.334575
2015-01-05   275.003852
2015-01-06   287.549521
...                 ...
2017-06-26  2426.365051
2017-06-27  2530.345536
2017-06-28  2562.792264
2017-06-29  2540.443032
2017-06-30  2452.712064

[911 rows x 1 columns]


In [None]:
def create_features(df1):
    """
    Create time series features based on time series index.
    """
    df1 = df1.copy()
    df1['dayofweek'] = df1.index.dayofweek
    df1['quarter'] = df1.index.quarter
    df1['month'] = df1.index.month
    df1['year'] = df1.index.year
    df1['dayofyear'] = df1.index.dayofyear
    df1['dayofmonth'] = df1.index.day
    df1['weekofyear'] = df1.index.isocalendar().week
    return df1

df1 = create_features(df1)
print(df1)

               PriceUSD  dayofweek  quarter  month  year  dayofyear  \
date                                                                  
2015-01-02   315.942732          4        1      1  2015          2   
2015-01-03   285.647310          5        1      1  2015          3   
2015-01-04   263.334575          6        1      1  2015          4   
2015-01-05   275.003852          0        1      1  2015          5   
2015-01-06   287.549521          1        1      1  2015          6   
...                 ...        ...      ...    ...   ...        ...   
2017-06-26  2426.365051          0        2      6  2017        177   
2017-06-27  2530.345536          1        2      6  2017        178   
2017-06-28  2562.792264          2        2      6  2017        179   
2017-06-29  2540.443032          3        2      6  2017        180   
2017-06-30  2452.712064          4        2      6  2017        181   

            dayofmonth  weekofyear  
date                                
20

In [None]:
FEATURES = ['year', 'dayofweek', 'dayofmonth']
TARGET = ['PriceUSD']

train = df1[:int(len(df1)*0.70)]
test = df1[int(len(df1)*0.70):]

X_train = train[FEATURES]
y_train = train[TARGET]

X_test = test[FEATURES]
y_test = test[TARGET]

In [None]:
# Entraînement et prédiction avec XGBoost
reg = xgb.XGBRegressor(booster='gbtree',
                       n_estimators=25,
                       objective='reg:squarederror',
                       max_depth=19,
                       learning_rate=1)
reg.fit(X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        verbose=100)
y_pred = reg.predict(X_test)

In [None]:
# Calcul des métriques d'évaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
bias = np.mean(y_pred - y_test.values.reshape(-1, 1))

print(f"Biais : {bias:.2f}")
print("MSE: ", mse)
print(f"R²: {r2:.4f}")

In [None]:
dfplot=df[['date', 'PriceUSD']]
dfplot = dfplot.set_index('date')
dfplot.index = pd.to_datetime(dfplot.index)
train = dfplot[:int(len(df1)*0.70)]
test = dfplot[int(len(df1)*0.70):]
print(len(df1))
print(len(test))
print(len(train))
fig, ax = plt.subplots(figsize=(15, 5))
train.plot(ax=ax, label='Training Set', title='Data Train/Test Split')
test.plot(ax=ax, label='Test Set')
ax.axvline(df1.iloc[int(len(df1)*0.60),0], color='black', ls='--')
ax.legend(['Training Set', 'Test Set'])
plt.show()

In [None]:
# Tracer le graphique des prédictions et des données réelles
test = df[int(len(df1)*0.70):]
dfplot = test[['date', 'PriceUSD']]
dfplot['prediction'] = y_pred
dfplot = dfplot.set_index('date')

fig, ax = plt.subplots(figsize=(15, 5))
dfplot.plot(ax=ax, title='Predictions/Real data')
ax.legend(['Real data', 'Predictions'])
#plt.savefig('/content/drive/MyDrive/xgdatedate.png')
dfplot.to_csv('/content/drive/MyDrive/XGBoost.csv')
plt.show()