In [1]:
import pandas as pd
import numpy as np
from DynamicFactor import DynamicFactorModel
from ForecastingwithDFM import ForecastingWithDFM

In [2]:
# Lecture du fichier *.txt
data = pd.read_csv('fredmq.txt', sep=',').set_index(keys='Date')
# Récupération des types de transformations dans un dictionnaire
transformations = dict(data.loc['Transform']) 
# Abandon de la ligne des transformations
data.drop(labels='Transform', inplace=True)
# Ajustement de l'indice du DataFrame au format datetime
data.index = pd.to_datetime(arg=data.index)
# Abandon des colonnes ayant un nombre de NaN's >= 30
data.dropna(thresh=len(data) - 30, axis=1, inplace=True)
# Abandon des NaN's restants
data.dropna(axis=0, inplace=True)

In [3]:
# Création du DataFrame stockant les séries transformées
transformed_data = pd.DataFrame()
# Transformation des séries selon les codes fournis par McCracken et Ng
for col in data.columns:
    # Récupération du code de transformation
    code = transformations[col]
    # Récupération de la série individuelle
    subset = data[col]
    # Application des transformées selon le code correspondant
    if code == 1:
        # Aucune transformation à appliquer
        temp = subset
    elif code == 2:
        # Différence première
        temp = subset.diff(periods=1)
    elif code == 3:
        # Différence seconde 
        temp = subset.diff(periods=1).diff(periods=1)
    elif code == 4:
        # Transformée en log
        temp = np.log(subset)
    elif code == 5:
        # Différence première du log
        temp = np.log(subset).diff(periods=1)
    elif code == 6:
        # Différence seconde du log 
        temp = np.log(subset).diff(periods=1).diff(periods=1)
    elif code == 7:
        # Différence première de la variation relative
        temp = subset.pct_change().diff(periods=1)
    # Aggrégation des séries transformées
    transformed_data = pd.concat([transformed_data, temp], axis=1)
# Abandon des NaN's causées par les différences premières et secondes
transformed_data.dropna(axis=0, inplace=True)

In [4]:
# Calcul de la médiane des séries
medians = transformed_data.median(axis=0)
# DataFrame contenant les médianes des séries en chaque point
mdf =  transformed_data * 0 + medians
# Calcul de la distance entre les observations et les médianes
z = abs(transformed_data - mdf)
# Calcul de la gamme interquartile des séries
irq = transformed_data.quantile(q=.75) - transformed_data.quantile(q=.25)
# DataFrame contenant les gammes interquartiles des séries en chaque point
irqdf = transformed_data * 0 + irq
# Détermination des outliers (x est un outlier si abs(x - mediane) > 10 * gamme interquartile)
outliers = z > 10 * irqdf
# Abandon des observations considérées comme outliers
mapping = transformed_data[outliers == False].dropna(axis=0)

In [5]:
# Sélection de la période d'estimation
estimation_sample = mapping.loc[(mapping.index >= pd.to_datetime('1983-01-01')) & (mapping.index <= pd.to_datetime('2016-12-01'))]
# Sélection de la période de prévision
forecast_sample = mapping.loc[mapping.index >= pd.to_datetime('2017-01-01')]

In [6]:
# Standardisation des données d'estimation
estimation_sample_std = (estimation_sample - estimation_sample.mean(axis=0)) / estimation_sample.std(axis=0)

In [7]:
estimation_sample_std

Unnamed: 0,RPI,W875RX1,DPCERA3M086SBEA,CMRMTSPLx,RETAILx,INDPRO,IPFPNSS,IPFINAL,IPCONGD,IPDCONGD,...,DNDGRG3M086SBEA,DSERRG3M086SBEA,CES0600000008,CES2000000008,CES3000000008,UMCSENTx,DTCOLNVHFNM,DTCTHFNM,INVEST,VIXCLSx
1983-03-01,0.449066,0.248101,1.721643,1.694292,2.098154,1.150470,1.013769,0.550512,0.393527,0.607086,...,0.642718,-1.742659,-2.142131,-1.346560,-2.392695,1.592923,1.216903,1.956168,0.784695,-0.043435
1983-04-01,-0.300639,-0.154651,0.770879,-0.326263,-0.133306,1.937413,1.933051,1.660330,2.617044,1.040198,...,0.992028,1.589171,0.723032,0.008005,0.694548,2.141377,-0.649699,-1.244423,0.005713,-0.170126
1983-05-01,0.502507,0.357311,0.726766,1.547905,1.180754,0.751510,0.681358,0.561311,0.287061,0.869108,...,-0.208190,-1.187006,0.007325,-0.670171,0.349950,1.070586,-0.115029,0.179001,-0.198108,-0.256890
1983-06-01,-0.194698,0.044041,1.528472,2.749613,1.767617,0.714177,0.541458,0.280850,0.298652,0.784570,...,-0.686630,2.249341,0.363829,0.957451,-0.331315,-0.313608,0.574007,0.976976,0.056520,-0.128748
1983-07-01,0.076724,0.575678,0.913518,-0.379462,0.159464,2.371864,2.133012,1.775402,1.641782,1.104806,...,0.238171,1.551269,0.716502,-0.127751,1.023412,0.130379,-0.499466,-0.853112,-2.157510,0.501987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-08-01,-0.241775,-0.305044,-0.331446,0.603267,-0.295028,-0.594307,-0.370667,-0.088367,0.111242,-0.355952,...,1.042795,-0.283464,0.153278,0.008033,0.440231,-0.078556,0.851026,0.740891,-0.488817,-1.121881
2016-09-01,-0.010275,0.084557,0.016563,-0.143813,0.229042,-0.623833,-0.520086,-0.632517,-0.476555,0.027381,...,0.559102,-0.374126,0.298204,0.302052,-0.561487,0.339314,-0.293422,-0.342653,0.425401,-0.846650
2016-10-01,-0.129021,-0.218991,-0.666820,-0.166519,-0.197105,-0.298237,-0.631337,-0.503694,-0.901320,0.165784,...,-0.040651,0.368277,-0.139073,0.007023,0.154018,-1.070997,-0.640060,-0.602281,-0.275078,-0.780748
2016-11-01,0.046438,0.186593,-0.198498,-0.268428,-0.421430,-1.188113,-1.180510,-1.319876,-1.662740,-1.056193,...,-0.814105,-0.002943,-0.138302,-0.343980,-0.131641,1.697390,0.010194,0.101163,-0.908454,-0.717985


# Mise en place des paramètres

In [8]:

# Génération de données simulées pour tester le modèle
T, N, r = estimation_sample_std.shape[0], estimation_sample_std.shape[1], 3  # Nombre de périodes, variables observées, facteurs
### Paramètre pour KFS##########
# La matrice de transition d'état A doit être r x r
A = np.eye(r)

# Si vous avez 4 variables observées (pour CPI_diff, INDPRO_diff, RPI_diff, UNRATE_diff),
# la matrice d'observation C doit être 4 x r
C = np.random.rand(N, r)  # initialisée aléatoirement, ajustez selon votre modèle

# La matrice de covariance du bruit du processus Q doit être r x r
Q = np.eye(r) * 0.01

# La matrice de covariance du bruit d'observation R doit être 4 x 4
R = np.eye(N) * 0.02

# L'état initial estimé du système doit être de taille r
initial_state = np.zeros(r)

# La covariance initiale estimée doit être r x r
initial_covariance = np.eye(r) * 0.1

paramKFS=[A,C,Q,R,initial_state,initial_covariance]

# Mise en place du Dynamic Factor Model

In [9]:
##### DFM############
# Exemple d'initialisation et d'utilisation de la classe
dfm_PC = DynamicFactorModel(n_factors=N, n_variables=r,extraction_method="PC",params=paramKFS)
# Afficher les informations du modèle

Y_simulated =np.array(estimation_sample_std)
dfm_PC.fit(Y_simulated)


Chargements des facteurs (Lambda):
[[ 8.12522931e-02 -1.54324952e-02  7.12861441e-02 ... -1.24039484e-03
  -7.78148456e-03 -7.97405463e-04]
 [ 7.91545445e-02 -2.41513327e-02  6.77025781e-02 ...  2.15217261e-03
   6.65120193e-03 -8.22394474e-04]
 [ 7.68560403e-02  1.53087493e-02 -4.24883066e-04 ... -2.31430956e-03
   3.19594043e-03  6.73246428e-04]
 ...
 [ 2.16591114e-02 -1.63098209e-02 -3.01425349e-03 ... -1.82178032e-03
   6.14863126e-05 -2.97745359e-04]
 [ 9.52999042e-03  1.59570529e-02 -3.22643008e-02 ... -7.96960900e-04
   7.55046122e-04  3.72099986e-04]
 [-6.35659182e-02  2.46036303e-02  1.67376918e-02 ...  1.17901836e-04
  -8.68819839e-04  6.68976684e-04]]

Valeurs propres:
[1.47449252e+01 1.04999561e+01 9.84652623e+00 6.85831871e+00
 6.13602715e+00 4.19880207e+00 3.81728103e+00 3.19977513e+00
 3.10990251e+00 2.75972352e+00 2.54993096e+00 2.43599271e+00
 2.33891163e+00 2.25128930e+00 2.16694918e+00 2.04142573e+00
 1.93487927e+00 1.82500497e+00 1.81585630e+00 1.72947858e+00
 1.68

In [10]:
# Affichage des résultats
print("\nChargements de facteurs estimés (Lambda):")
print(dfm_PC.factor_loadings)
print("\nVariances idiosyncratiques estimées (diagonal de Sigma_e):")
print(dfm_PC.idiosyncratic_variances)


Chargements de facteurs estimés (Lambda):
[[ 0.13124953  0.09829087  0.02257026 ...  0.0250553   0.04263304
  -0.11093384]
 [-0.04123118 -0.03939974  0.0134871  ... -0.03032532  0.14407049
  -0.0487693 ]
 [-0.02119114  0.026811   -0.12065735 ...  0.00112719  0.05883172
   0.03906455]
 ...
 [ 0.06714585  0.06898674 -0.01333465 ...  0.00464854  0.02015706
   0.04889438]
 [-0.01558585 -0.02213924 -0.15116114 ...  0.00016613 -0.02221803
   0.07668469]
 [-0.12553251 -0.10629774 -0.05002288 ... -0.03678547 -0.02637634
  -0.07330458]]

Variances idiosyncratiques estimées (diagonal de Sigma_e):
[1.76363313 1.21275708 1.89137792 1.31474316 1.55023143 1.71838042
 1.57020983 1.21379123 1.50457703 1.51279703 1.29659517 1.44853146
 1.48949134 1.34099747 1.23723626 1.3162192  1.31129617 1.83226926
 1.55853611 1.25579365 1.40202986 1.56645398 1.83088128 1.60220099
 1.65978697 1.43780257 1.40347513 1.5654837  1.20648212 1.36209361
 1.16656906 1.40478335 1.3112743  1.76581124 1.60604639 1.55281481
 1.

In [11]:
#####Principal Components############
dfm_PC.fit(Y_simulated)  # Adapter le modèle
factor_scores = dfm_PC.transform(Y_simulated)  # Transformer les données
#####KFM#############################
dfm_KFS = DynamicFactorModel(n_factors=N, n_variables=r,extraction_method="KFS",params=paramKFS,iterations=10)
dfm_KFS.fit(Y_simulated)


Chargements des facteurs (Lambda):
[[ 8.12522931e-02 -1.54324952e-02  7.12861441e-02 ... -1.24039484e-03
  -7.78148456e-03 -7.97405463e-04]
 [ 7.91545445e-02 -2.41513327e-02  6.77025781e-02 ...  2.15217261e-03
   6.65120193e-03 -8.22394474e-04]
 [ 7.68560403e-02  1.53087493e-02 -4.24883066e-04 ... -2.31430956e-03
   3.19594043e-03  6.73246428e-04]
 ...
 [ 2.16591114e-02 -1.63098209e-02 -3.01425349e-03 ... -1.82178032e-03
   6.14863126e-05 -2.97745359e-04]
 [ 9.52999042e-03  1.59570529e-02 -3.22643008e-02 ... -7.96960900e-04
   7.55046122e-04  3.72099986e-04]
 [-6.35659182e-02  2.46036303e-02  1.67376918e-02 ...  1.17901836e-04
  -8.68819839e-04  6.68976684e-04]]

Valeurs propres:
[1.47449252e+01 1.04999561e+01 9.84652623e+00 6.85831871e+00
 6.13602715e+00 4.19880207e+00 3.81728103e+00 3.19977513e+00
 3.10990251e+00 2.75972352e+00 2.54993096e+00 2.43599271e+00
 2.33891163e+00 2.25128930e+00 2.16694918e+00 2.04142573e+00
 1.93487927e+00 1.82500497e+00 1.81585630e+00 1.72947858e+00
 1.68

In [12]:
y = np.random.randn(100, 1)  # Simuler une série temporelle
factors = np.random.randn(T, 3)  # Simuler des facteurs estimés
print(estimation_sample_std.shape)
print(factor_scores)
# Pour les prévisions avec DFM, initialiser le modèle avec le nombre correct de retards et de facteurs
forecasting_model = ForecastingWithDFM(num_factors=r, lags_y=1, lags_f=1)
# Ajuster le modèle sur les données
forecasting_model.fit(estimation_sample_std.values,factor_scores)
# Faire des prévisions avec le modèle ajusté
predictions = forecasting_model.predict(estimation_sample_std.values, factor_scores)
# Afficher les prédictions
print(predictions)

(381, 125)
[[-374.4951374   489.565521    159.19348794 ...  170.8860848
    20.33389448   95.25164369]
 [-542.90707095  752.37533709  299.56822059 ...  233.79582498
    30.68337343  174.32409157]
 [  75.07321292 -136.92399822  -56.21456845 ...  -76.28071491
   -23.36941635  -26.15490018]
 ...
 [  33.47454218  -66.62017348  -37.50927299 ...  -20.79919084
   -16.68139366  -27.55523352]
 [-504.89103002  684.34781761  248.96161611 ...  235.05371406
     8.83586506  108.5235411 ]
 [   1.57336233   24.02154593   29.15395284 ...   28.27733686
    14.4114909     5.42074751]]
[[ 4.49065847e-01  2.48100688e-01  1.72164295e+00 ...  1.70886085e+02
   2.03338945e+01  9.52516437e+01]
 [-3.00638572e-01 -1.54650948e-01  7.70878539e-01 ...  2.33795825e+02
   3.06833734e+01  1.74324092e+02]
 [ 5.02507084e-01  3.57310724e-01  7.26765705e-01 ... -7.62807149e+01
  -2.33694164e+01 -2.61549002e+01]
 ...
 [-1.02749627e-02  8.45570181e-02  1.65632336e-02 ... -1.16848155e+02
  -3.78300227e-01 -6.64053625e+01]
 

ValueError: X has 250 features, but LinearRegression is expecting 126 features as input.

In [13]:
estimation_sample_std.columns.to_list()

['RPI',
 'W875RX1',
 'DPCERA3M086SBEA',
 'CMRMTSPLx',
 'RETAILx',
 'INDPRO',
 'IPFPNSS',
 'IPFINAL',
 'IPCONGD',
 'IPDCONGD',
 'IPNCONGD',
 'IPBUSEQ',
 'IPMAT',
 'IPDMAT',
 'IPNMAT',
 'IPMANSICS',
 'IPB51222S',
 'IPFUELS',
 'CUMFNS',
 'HWI',
 'HWIURATIO',
 'CLF16OV',
 'CE16OV',
 'UNRATE',
 'UEMPMEAN',
 'UEMPLT5',
 'UEMP5TO14',
 'UEMP15OV',
 'UEMP15T26',
 'UEMP27OV',
 'CLAIMSx',
 'PAYEMS',
 'USGOOD',
 'CES1021000001',
 'USCONS',
 'MANEMP',
 'DMANEMP',
 'NDMANEMP',
 'SRVPRD',
 'USTPU',
 'USWTRADE',
 'USTRADE',
 'USFIRE',
 'USGOVT',
 'CES0600000007',
 'AWOTMAN',
 'AWHMAN',
 'HOUST',
 'HOUSTNE',
 'HOUSTMW',
 'HOUSTS',
 'HOUSTW',
 'PERMIT',
 'PERMITNE',
 'PERMITMW',
 'PERMITS',
 'PERMITW',
 'AMDMNOx',
 'ANDENOx',
 'AMDMUOx',
 'BUSINVx',
 'ISRATIOx',
 'M1SL',
 'M2SL',
 'M2REAL',
 'BOGMBASE',
 'TOTRESNS',
 'NONBORRES',
 'BUSLOANS',
 'REALLN',
 'NONREVSL',
 'CONSPI',
 'S&P 500',
 'S&P div yield',
 'S&P PE ratio',
 'FEDFUNDS',
 'CP3Mx',
 'TB3MS',
 'TB6MS',
 'GS1',
 'GS5',
 'GS10',
 'AAA',
 'BAA

# Récupération des données Hors échantillon

In [14]:
import requests
import pandas as pd

# Spécifiez les tickers des séries que vous souhaitez récupérer
tickers = estimation_sample_std.columns.to_list()
# Supprimer le 'x' à la fin de chaque terme
tickers_cleaned = [term.rstrip('x') for term in tickers]

# Base URL de l'API FRED
base_url = 'https://api.stlouisfed.org/fred/series/observations'

# Clé API FRED
api_key = 'c78f8ad50c3f014609882fc3a41d8246'

# Dictionnaire pour stocker les données
all_data = {}
ticker_empty=[]
# Récupérer les données pour chaque ticker
for ticker in tickers_cleaned:
    url = f'{base_url}?series_id={ticker}&api_key={api_key}&file_type=json&frequency=m'
    #url = f'{base_url}?series_id={ticker}&api_key={api_key}&file_type=json'
    response = requests.get(url)
    data = response.json()

    # Vérifier si le ticker existe dans les données
    if 'observations' in data:
        # Filtrer les données pour la période spécifiée
        filtered_data = [obs for obs in data['observations'] if 'date' in obs and 'value' in obs and 'realtime_start' in obs and 'realtime_end' in obs and obs['date'] >= '2017-01-01' and obs['date'] <= '2024-04-01']
        all_data[ticker] = filtered_data
    else:
        #print(f"Le ticker {ticker} n'existe pas dans les données.")
        ticker_empty.append(ticker)
# Créer un DataFrame à partir des données
dfs = {}
for ticker, data in all_data.items():
    dfs[ticker] = pd.DataFrame(data)


# Convertir les données en DataFrames pour chaque indicateur
dfscleaned = []

for indicator, values in dfs.items():
    df = pd.DataFrame(values)
    if not df.empty:
        df.set_index('date', inplace=True)
        df.rename(columns={'value': indicator}, inplace=True)
        # Supprimer les colonnes 'realtime_start' et 'realtime_end'
        df.drop(columns=['realtime_start', 'realtime_end'], inplace=True)
        dfscleaned.append(df)
    else:
        ticker_empty.append(indicator)

# Fusionner les DataFrames en un seul
df_combined = pd.concat(dfscleaned, axis=1)

# Afficher le DataFrame combiné
#df_combined



In [16]:
df_combined

Unnamed: 0_level_0,RPI,W875RX1,DPCERA3M086SBEA,CMRMTSPL,INDPRO,IPFPNSS,IPFINAL,IPCONGD,IPDCONGD,IPNCONGD,...,DNDGRG3M086SBEA,DSERRG3M086SBEA,CES0600000008,CES2000000008,CES3000000008,UMCSENT,DTCOLNVHFNM,DTCTHFNM,INVEST,VIXCLS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01,16383.236,13541.3,98.839,1365202.000,98.7987,99.1552,99.0330,100.3095,101.5192,99.9543,...,100.045,99.015,22.87,26.40,20.63,98.5,304911.01,735727.48,3321.8923,11.61
2017-02-01,16431.536,13589.2,98.892,1363025.000,98.4322,98.4654,98.0920,98.9026,101.1175,98.2521,...,99.999,99.231,22.86,26.33,20.64,96.3,302927.61,731024.65,3344.9762,11.53
2017-03-01,16491.77,13642.8,99.331,1360169.000,99.0663,98.9354,98.7285,99.5125,99.6555,99.4721,...,100.003,99.209,22.93,26.43,20.69,96.9,304015.25,731639.29,3359.2402,11.90
2017-04-01,16521.43,13682.6,99.364,1355381.000,100.0082,100.0831,100.1470,100.3026,102.0769,99.7842,...,99.912,99.534,22.94,26.37,20.75,97,302075.41,727865.34,3359.0723,13.14
2017-05-01,16626.085,13780.7,99.427,1360858.000,100.1286,100.3142,100.5119,100.7767,100.9060,100.7399,...,99.179,99.727,23.05,26.55,20.80,97.1,303306.64,730438.04,3372.0493,10.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-01,19265.016,15893.6,117.796,1523993.000,102.6149,100.8179,101.2544,101.7841,108.0061,100.0588,...,119.192,124.912,30.42,34.98,27.13,69.7,521938.91,921959.34,5085.0478,12.72
2024-01-01,19382.171,15935.5,117.425,1504096.000,101.811,100.3736,100.919,101.6965,105.3509,100.6426,...,118.755,125.799,30.57,35.30,27.21,79,524940.39,924448.27,5112.786,13.39
2024-02-01,19370.105,15913.7,117.991,1511532.000,102.2599,100.5583,100.7451,100.8224,107.4615,98.9903,...,119.537,126.13,30.67,35.23,27.35,76.9,527544.9,925641.71,5110.8945,13.98
2024-03-01,19407.154,15950.6,118.597,,102.6577,101.3064,101.7222,102.0617,109.5133,100.0167,...,119.761,126.646,30.81,35.40,27.45,79.4,,,5207.4444,13.79


In [71]:
print("les données non retrouvées")
ticker_empty

les données non retrouvées


['HWI',
 'HWIURATIO',
 'CLAIMS',
 'AMDMNO',
 'CONSPI',
 'S&P 500',
 'S&P div yield',
 'S&P PE ratio',
 'COMPAPFF',
 'UMCSENT',
 'DTCOLNVHFNM',
 'DTCTHFNM',
 'INVEST',
 'VIXCLS',
 'RETAIL',
 'CP3M',
 'OILPRICE']