# **Packages import**

In [5]:
import pandas as pd
from arch import arch_model
import numpy as np
import seaborn as sns
import os
import matplotlib.pyplot as plt

# **Processing MASI dataset**

In [18]:

# File paths
weekly_file = "../weekly-masi-scraper/data/weekly_masi/weekly_masi_data.csv"
cleaned_file = "MASI_cleaned.csv"
output_file = "MASI_cleaned.csv"  # Change if you want to overwrite

# Check if files exist
if not os.path.exists(weekly_file):
    print(f"⚠️ Weekly MASI file not found: {weekly_file}")
    combined_MASI_df = pd.read_csv(cleaned_file, parse_dates=["Date"], dayfirst=True)
    combined_MASI_df["Date"] = pd.to_datetime(combined_MASI_df["Date"])
    # Save combined CSV
    combined_MASI_df.to_csv(output_file, index=False, encoding="utf-8")
    print(f"✅ Cleaned CSV saved as {output_file}")
else:
    # Load CSVs
    weekly_df = pd.read_csv(weekly_file, parse_dates=["Date"], dayfirst=True)
    cleaned_df = pd.read_csv(cleaned_file, parse_dates=["Date"], dayfirst=True)

    # Concatenate
    combined_MASI_df = pd.concat([cleaned_df, weekly_df], ignore_index=True)

    # Drop duplicates if needed
    combined_MASI_df = combined_MASI_df.drop_duplicates(subset=["Date"])

    # Sort ascending by date
    combined_MASI_df = combined_MASI_df.sort_values("Date").reset_index(drop=True)

    # Save combined CSV
    combined_MASI_df.to_csv(output_file, index=False, encoding="utf-8")
    print(f"✅ Combined CSV saved as {output_file}")



⚠️ Weekly MASI file not found: ../weekly-masi-scraper/data/weekly_masi/weekly_masi_data.csv
✅ Cleaned CSV saved as MASI_cleaned.csv


In [17]:
combined_MASI_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Date         250 non-null    object 
 1   Variation %  250 non-null    float64
 2   weekly_mean  250 non-null    float64
dtypes: float64(2), object(1)
memory usage: 6.0+ KB


# **Processing ATTIJARI DIVERSIFIE**

In [7]:
attijari = pd.read_csv("DIVERSIFIE_ALL.csv", parse_dates=True)
attijari

Unnamed: 0,Date,Fonds,Horizon minimum conseillé,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans
0,2021-01-08,ATTIJARI DIVERSIFIE,4,539.68,0.25%,0.18%,5.99%,0.55%,11.23%,9.21%,35.94%
1,2021-01-15,ATTIJARI DIVERSIFIE,4,543.51,0.96%,0.71%,6.43%,0.26%,11.42%,9.34%,37.15%
2,2021-01-22,ATTIJARI DIVERSIFIE,4,546.53,1.52%,0.56%,6.98%,0.31%,11.49%,9.69%,37.75%
3,2021-01-29,ATTIJARI DIVERSIFIE,4,547.58,1.72%,0.19%,7.23%,0.88%,11.53%,9.54%,37.77%
4,2021-02-12,ATTIJARI DIVERSIFIE,4,549.92,2.15%,0.23%,7.33%,2.47%,11.25%,10.05%,38.14%
...,...,...,...,...,...,...,...,...,...,...,...
244,2025-11-14,ATTIJARI DIVERSIFIE,4,726.77,13.90%,1.87%,3.89%,13.26%,29.74%,35.37%,39.45%
245,2025-11-21,ATTIJARI DIVERSIFIE,4,715.36,12.11%,1.57%,1.21%,11.56%,27.49%,32.53%,36.14%
246,2025-11-28,ATTIJARI DIVERSIFIE,4,723.81,13.43%,1.18%,2.42%,13.34%,29.42%,33.47%,38.04%
247,2025-12-05,ATTIJARI DIVERSIFIE,4,720.56,12.92%,0.45%,0.59%,13.14%,28.26%,31.91%,36.83%


In [8]:
attijari.drop(columns=["Fonds",	"Horizon minimum conseillé"], inplace = True)
attijari

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans
0,2021-01-08,539.68,0.25%,0.18%,5.99%,0.55%,11.23%,9.21%,35.94%
1,2021-01-15,543.51,0.96%,0.71%,6.43%,0.26%,11.42%,9.34%,37.15%
2,2021-01-22,546.53,1.52%,0.56%,6.98%,0.31%,11.49%,9.69%,37.75%
3,2021-01-29,547.58,1.72%,0.19%,7.23%,0.88%,11.53%,9.54%,37.77%
4,2021-02-12,549.92,2.15%,0.23%,7.33%,2.47%,11.25%,10.05%,38.14%
...,...,...,...,...,...,...,...,...,...
244,2025-11-14,726.77,13.90%,1.87%,3.89%,13.26%,29.74%,35.37%,39.45%
245,2025-11-21,715.36,12.11%,1.57%,1.21%,11.56%,27.49%,32.53%,36.14%
246,2025-11-28,723.81,13.43%,1.18%,2.42%,13.34%,29.42%,33.47%,38.04%
247,2025-12-05,720.56,12.92%,0.45%,0.59%,13.14%,28.26%,31.91%,36.83%


In [9]:
columns_to_clean = ["Performances glissantes Depuis Début d'année",	"Performances glissantes 1 semaine", "Performances glissantes 6 mois", "Performances glissantes 1 an", "Performances glissantes 2 ans",	"Performances glissantes 3 ans", "Performances glissantes 5 ans"]

In [10]:
for col in columns_to_clean:
    attijari[col] = (
    attijari[col]
    .str.replace("%", "", regex=False)
    .astype(float)
)
attijari

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14
...,...,...,...,...,...,...,...,...,...
244,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45
245,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14
246,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04
247,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83


In [11]:
attijari["Date"] = pd.to_datetime(attijari["Date"])
attijari

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14
...,...,...,...,...,...,...,...,...,...
244,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45
245,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14
246,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04
247,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83


In [12]:
attijari.to_csv("attijari_diversifie.csv", index = False)

# **Merging MASI and Attijari data**

In [19]:
df_merged = pd.merge(attijari, combined_MASI_df, on = "Date", how = "inner")
df_merged

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,weekly_mean
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94,0.000000,11266.1280
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15,0.527129,11325.5150
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75,1.615229,11508.4480
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,11667.5300
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,11613.8460
...,...,...,...,...,...,...,...,...,...,...,...
229,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,18751.5620
230,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,18255.5225
231,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,18500.0160
232,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,18402.5740


# **Feature engineering**

In [20]:
# df = ton DataFrame
for lag in range(1, 3):
    df_merged[f'rendement_lag_{lag}'] = df_merged['Performances glissantes 1 semaine'].shift(lag)

In [21]:
df_merged

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,weekly_mean,rendement_lag_1,rendement_lag_2
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94,0.000000,11266.1280,,
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15,0.527129,11325.5150,0.18,
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75,1.615229,11508.4480,0.71,0.18
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,11667.5300,0.56,0.71
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,11613.8460,0.19,0.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,18751.5620,0.94,0.89
230,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,18255.5225,1.87,0.94
231,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,18500.0160,1.57,1.87
232,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,18402.5740,1.18,1.57


In [32]:
df_lagged = df_merged.copy()
df_lagged

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,weekly_mean,rendement_lag_1,rendement_lag_2
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94,0.000000,11266.1280,,
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15,0.527129,11325.5150,0.18,
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75,1.615229,11508.4480,0.71,0.18
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,11667.5300,0.56,0.71
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,11613.8460,0.19,0.56
...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,18751.5620,0.94,0.89
230,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,18255.5225,1.87,0.94
231,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,18500.0160,1.57,1.87
232,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,18402.5740,1.18,1.57


In [33]:
df_lagged['returns'] = df_lagged['Performances glissantes 1 semaine'] / 100

# Remove missing values
returns = df_lagged['returns'].dropna()
returns

0      0.0018
1      0.0071
2      0.0056
3      0.0019
4      0.0023
        ...  
229    0.0187
230    0.0157
231    0.0118
232    0.0045
233    0.0033
Name: returns, Length: 234, dtype: float64

In [34]:
# Define the model
garch = arch_model(
    returns,
    mean='Constant',
    vol='GARCH',
    p=1,
    q=1,
    dist='normal'   # or 't' (often better in finance)
)

# Fit the model
garch_result = garch.fit(disp="off")

estimating the model parameters. The scale of y is 4.029e-05. Parameter
estimation work better when this value is between 1 and 1000. The recommended
rescaling is 100 * y.

model or by setting rescale=False.

  self._check_scale(resids)


In [35]:
# Conditional volatility (σ_t)
df_lagged.loc[returns.index, 'garch_vol'] = garch_result.conditional_volatility
df_lagged

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,weekly_mean,rendement_lag_1,rendement_lag_2,returns,garch_vol
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94,0.000000,11266.1280,,,0.0018,0.005041
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15,0.527129,11325.5150,0.18,,0.0071,0.005625
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75,1.615229,11508.4480,0.71,0.18,0.0056,0.005751
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,11667.5300,0.56,0.71,0.0019,0.005777
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,11613.8460,0.19,0.56,0.0023,0.005896
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,18751.5620,0.94,0.89,0.0187,0.006384
230,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,18255.5225,1.87,0.94,0.0157,0.007349
231,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,18500.0160,1.57,1.87,0.0118,0.007228
232,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,18402.5740,1.18,1.57,0.0045,0.006714


In [36]:
df_lagged['garch_vol_lag_1'] = df_lagged['garch_vol'].shift(1)
df_lagged['garch_vol_lag_2'] = df_lagged['garch_vol'].shift(2)
df_lagged


Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,weekly_mean,rendement_lag_1,rendement_lag_2,returns,garch_vol,garch_vol_lag_1,garch_vol_lag_2
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94,0.000000,11266.1280,,,0.0018,0.005041,,
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15,0.527129,11325.5150,0.18,,0.0071,0.005625,0.005041,
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75,1.615229,11508.4480,0.71,0.18,0.0056,0.005751,0.005625,0.005041
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,11667.5300,0.56,0.71,0.0019,0.005777,0.005751,0.005625
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,11613.8460,0.19,0.56,0.0023,0.005896,0.005777,0.005751
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,18751.5620,0.94,0.89,0.0187,0.006384,0.006902,0.008109
230,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,18255.5225,1.87,0.94,0.0157,0.007349,0.006384,0.006902
231,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,18500.0160,1.57,1.87,0.0118,0.007228,0.007349,0.006384
232,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,18402.5740,1.18,1.57,0.0045,0.006714,0.007228,0.007349


In [37]:
# Compute future squared returns
future_sq_returns = returns.shift(-1)**2 + returns.shift(-2)**2

# Compute 2-week future volatility
df_lagged['vol_future_2w'] = np.sqrt(future_sq_returns / 2)
df_lagged

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,weekly_mean,rendement_lag_1,rendement_lag_2,returns,garch_vol,garch_vol_lag_1,garch_vol_lag_2,vol_future_2w
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94,0.000000,11266.1280,,,0.0018,0.005041,,,0.006394
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15,0.527129,11325.5150,0.18,,0.0071,0.005625,0.005041,,0.004182
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75,1.615229,11508.4480,0.71,0.18,0.0056,0.005751,0.005625,0.005041,0.002110
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,11667.5300,0.56,0.71,0.0019,0.005777,0.005751,0.005625,0.002300
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,11613.8460,0.19,0.56,0.0023,0.005896,0.005777,0.005751,0.004610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,18751.5620,0.94,0.89,0.0187,0.006384,0.006902,0.008109,0.013888
230,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,18255.5225,1.87,0.94,0.0157,0.007349,0.006384,0.006902,0.008930
231,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,18500.0160,1.57,1.87,0.0118,0.007228,0.007349,0.006384,0.003946
232,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,18402.5740,1.18,1.57,0.0045,0.006714,0.007228,0.007349,


In [39]:
df_lagged['vol_future_2w_1'] = df_lagged['vol_future_2w'].shift(1)
df_lagged['vol_future_2w_2'] = df_lagged['vol_future_2w'].shift(2)
df_lagged['vol_future_2w_3'] = df_lagged['vol_future_2w'].shift(3)
df_lagged

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,...,rendement_lag_1,rendement_lag_2,returns,garch_vol,garch_vol_lag_1,garch_vol_lag_2,vol_future_2w,vol_future_2w_1,vol_future_2w_2,vol_future_2w_3
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94,0.000000,...,,,0.0018,0.005041,,,0.006394,,,
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15,0.527129,...,0.18,,0.0071,0.005625,0.005041,,0.004182,0.006394,,
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75,1.615229,...,0.71,0.18,0.0056,0.005751,0.005625,0.005041,0.002110,0.004182,0.006394,
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,...,0.56,0.71,0.0019,0.005777,0.005751,0.005625,0.002300,0.002110,0.004182,0.006394
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,...,0.19,0.56,0.0023,0.005896,0.005777,0.005751,0.004610,0.002300,0.002110,0.004182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,...,0.94,0.89,0.0187,0.006384,0.006902,0.008109,0.013888,0.017265,0.014799,0.009153
230,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,...,1.87,0.94,0.0157,0.007349,0.006384,0.006902,0.008930,0.013888,0.017265,0.014799
231,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,...,1.57,1.87,0.0118,0.007228,0.007349,0.006384,0.003946,0.008930,0.013888,0.017265
232,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,...,1.18,1.57,0.0045,0.006714,0.007228,0.007349,,0.003946,0.008930,0.013888


In [38]:
df_lagged.isna().sum()

Date                                            0
Valeur Liquidative                              0
Performances glissantes Depuis Début d'année    0
Performances glissantes 1 semaine               0
Performances glissantes 6 mois                  0
Performances glissantes 1 an                    0
Performances glissantes 2 ans                   0
Performances glissantes 3 ans                   0
Performances glissantes 5 ans                   0
Variation %                                     0
weekly_mean                                     0
rendement_lag_1                                 1
rendement_lag_2                                 2
returns                                         0
garch_vol                                       0
garch_vol_lag_1                                 1
garch_vol_lag_2                                 2
vol_future_2w                                   2
dtype: int64

In [40]:
df_lagged.reset_index(drop = True, inplace = True)
df_lagged

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,...,rendement_lag_1,rendement_lag_2,returns,garch_vol,garch_vol_lag_1,garch_vol_lag_2,vol_future_2w,vol_future_2w_1,vol_future_2w_2,vol_future_2w_3
0,2021-01-08,539.68,0.25,0.18,5.99,0.55,11.23,9.21,35.94,0.000000,...,,,0.0018,0.005041,,,0.006394,,,
1,2021-01-15,543.51,0.96,0.71,6.43,0.26,11.42,9.34,37.15,0.527129,...,0.18,,0.0071,0.005625,0.005041,,0.004182,0.006394,,
2,2021-01-22,546.53,1.52,0.56,6.98,0.31,11.49,9.69,37.75,1.615229,...,0.71,0.18,0.0056,0.005751,0.005625,0.005041,0.002110,0.004182,0.006394,
3,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,...,0.56,0.71,0.0019,0.005777,0.005751,0.005625,0.002300,0.002110,0.004182,0.006394
4,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,...,0.19,0.56,0.0023,0.005896,0.005777,0.005751,0.004610,0.002300,0.002110,0.004182
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,...,0.94,0.89,0.0187,0.006384,0.006902,0.008109,0.013888,0.017265,0.014799,0.009153
230,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,...,1.87,0.94,0.0157,0.007349,0.006384,0.006902,0.008930,0.013888,0.017265,0.014799
231,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,...,1.57,1.87,0.0118,0.007228,0.007349,0.006384,0.003946,0.008930,0.013888,0.017265
232,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,...,1.18,1.57,0.0045,0.006714,0.007228,0.007349,,0.003946,0.008930,0.013888


In [45]:
# To ignore the original index and create a new sequential one (optional)
df_lagged = df_lagged.iloc[3:].reset_index(drop=True)

In [46]:
df_lagged

Unnamed: 0,Date,Valeur Liquidative,Performances glissantes Depuis Début d'année,Performances glissantes 1 semaine,Performances glissantes 6 mois,Performances glissantes 1 an,Performances glissantes 2 ans,Performances glissantes 3 ans,Performances glissantes 5 ans,Variation %,...,rendement_lag_1,rendement_lag_2,returns,garch_vol,garch_vol_lag_1,garch_vol_lag_2,vol_future_2w,vol_future_2w_1,vol_future_2w_2,vol_future_2w_3
0,2021-01-29,547.58,1.72,0.19,7.23,0.88,11.53,9.54,37.77,1.382306,...,0.56,0.71,0.0019,0.005777,0.005751,0.005625,0.002300,0.002110,0.004182,0.006394
1,2021-02-12,549.92,2.15,0.23,7.33,2.47,11.25,10.05,38.14,-0.827123,...,0.19,0.56,0.0023,0.005896,0.005777,0.005751,0.004610,0.002300,0.002110,0.004182
2,2021-02-19,548.67,1.92,0.23,7.05,1.53,10.83,9.63,36.87,-0.696358,...,0.23,0.19,0.0023,0.005920,0.005896,0.005777,0.004539,0.004610,0.002300,0.002110
3,2021-02-26,545.32,1.30,0.61,6.71,1.90,10.52,8.99,36.51,-1.441762,...,0.23,0.23,0.0061,0.005929,0.005920,0.005896,0.003482,0.004539,0.004610,0.002300
4,2021-03-05,544.22,1.09,0.20,6.08,4.55,10.38,8.71,35.47,-0.126193,...,0.61,0.23,0.0020,0.005852,0.005929,0.005920,0.003377,0.003482,0.004539,0.004610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226,2025-11-14,726.77,13.90,1.87,3.89,13.26,29.74,35.37,39.45,-3.772092,...,0.94,0.89,0.0187,0.006384,0.006902,0.008109,0.013888,0.017265,0.014799,0.009153
227,2025-11-21,715.36,12.11,1.57,1.21,11.56,27.49,32.53,36.14,-2.645324,...,1.87,0.94,0.0157,0.007349,0.006384,0.006902,0.008930,0.013888,0.017265,0.014799
228,2025-11-28,723.81,13.43,1.18,2.42,13.34,29.42,33.47,38.04,1.339285,...,1.57,1.87,0.0118,0.007228,0.007349,0.006384,0.003946,0.008930,0.013888,0.017265
229,2025-12-05,720.56,12.92,0.45,0.59,13.14,28.26,31.91,36.83,-0.526713,...,1.18,1.57,0.0045,0.006714,0.007228,0.007349,,0.003946,0.008930,0.013888


In [47]:
df_lagged.to_csv("final_dataset.csv", index = False)