## Import des données et pré-traitement

In [None]:
import pandas as pd
from src.utility.descriptive_statistics import descriptive_statistics
from src.graph.graph import plot_time_series

df = pd.read_pickle('src/data/panel_data.pkl')
df["index"] = pd.to_datetime(df["index"])

outlier_dates = [pd.Timestamp('2001-09-11')]
df = df[~df['index'].isin(outlier_dates)]

df = df[(df['index'] >= '1988-01-01') & (df['index'] <= '2017-01-01')]

for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df

## Statistiques descriptives

In [None]:
import numpy as np

original_stats, log_stats = descriptive_statistics(df["Maturity 1"])

stats_keys = ['Mean', 'Median', 'Minimum', 'Maximum', 'Std deviation', 'Skewness', 'Kurtosis', 'Autocorrelation', 'ADF test p-value (10 lags)', 'Nb obs']
df_combined_stats = pd.DataFrame(index=stats_keys, columns=['Prices (c/bu)', 'Log returns'])

for key in stats_keys:
    df_combined_stats.loc[key, 'Prices (c/bu)'] = original_stats.get(key, np.nan)
    log_key = 'Log ' + key  
    df_combined_stats.loc[key, 'Log returns'] = log_stats.get(log_key, np.nan)
df_combined_stats

## Graphiques


In [None]:
import matplotlib.pyplot as plt

df.set_index('index', inplace=True)

target_date = '2004-06-17'

prix_17_06_2004 = df.loc[target_date, ['Maturity 1', 'Maturity 2', 'Maturity 3', 'Maturity 4', 'Maturity 5']]

plt.plot(prix_17_06_2004.values, marker='o', linestyle='-')
plt.xlabel('Time to maturity (years)')
plt.ylabel('Settlement Price (USD/contract)')
plt.title(f'Term Structure of Corn Futures Prices, {target_date}')
plt.grid()
plt.show()


In [None]:
plot_time_series(df,["Maturity 1"], " Front-month settlement price (cents/bu)", "Dates", "cents/bu")

# Estimation du modele espace d'etat


## Preparation des données

In [None]:
import pandas as pd
import numpy as np
from src.utility.date import get_t, get_T

df.index = pd.to_datetime(df.index)
df['t'] = df.index.map(get_t)
df[['T1', 'T2', 'T3', 'T4', 'T5']] = pd.DataFrame(df.index.map(lambda x: pd.Series(get_T(x))).tolist(), index=df.index)
for i in range(1, 6):
    df[f'Maturity {i}'] = np.log(df[f'Maturity {i}'])
df
df = df[(df.index >= '1988-02-01') & (df.index <= '2015-01-01')]
df

## Estimation

In [6]:
from scipy.optimize import minimize
from src.model.kalman import KalmanModel
from src.utility.constant import mu, kappa, sigma1, sigma2, rho, lambdaz
from tqdm import tqdm

def objective(params, observations, times, maturities, n_factors):
    """Objective function for Kalman Model optimization."""
    param_keys = ['mu', 'sigma1', 'lambda1', 'kappa2', 'sigma2', 'lambda2', 'rho12',
                  'kappa3', 'sigma3', 'lambda3', 'rho13', 'rho23',
                  'kappa4', 'sigma4', 'lambda4', 'rho14', 'rho24', 'rho34']
    model_params = {
        key: params[i] for i, key in enumerate(param_keys[:3 + 4 * (n_factors - 1)])
    }
    model_params['maturities'] = maturities
    model_params['current_time'] = times

    model = KalmanModel(n_factors=n_factors, params=model_params)
    return model.compute_likelihood(observations, times, maturities)

observations = df.iloc[:, 0:5].values
times = df["t"].values
maturities = df.iloc[:, 6:11].values

initial_guesses = {
    1: [mu, sigma1, lambdaz],
    2: [mu, sigma1, lambdaz, kappa, sigma2, lambdaz, rho],
    3: [mu, sigma1, lambdaz, kappa, sigma2, lambdaz, rho, kappa, sigma2, lambdaz, rho, rho],
    4: [mu, sigma1, lambdaz, kappa, sigma2, lambdaz, rho, kappa, sigma2, lambdaz, rho, rho, kappa, sigma2, lambdaz, rho, rho, rho]
}

results = {}
for n_factors in tqdm(range(1, 5)):
    result = minimize(
        objective,
        initial_guesses[n_factors],
        args=(observations, times, maturities, n_factors),
        method='BFGS',
        options={'maxiter': 10}    
    )
    results[n_factors] = result
    print(f"Optimized parameters for {n_factors} factors:", result.x)
