# Phophet

In [12]:
from prophet import Prophet
import pandas as pd
import numpy as np

In [13]:
file_path = 'data/'

X_train = pd.read_csv(file_path + 'train_f_x.csv') #  entre le 1er janvier 2015 et le 31 décembre 2022 (soit 2922 jours)
y_train = pd.read_csv(file_path + 'y_train_sncf.csv', index_col=0)
X_test = pd.read_csv(file_path + 'test_f_x_THurtzP.csv', index_col=0) # entre le 1er janvier 2023 et le 30 juin 2023 (soit 181 jours) 

In [14]:
X_train['index'] = X_train['date'].astype(str) + '_' + X_train['station'].astype(str)
X_train = X_train.set_index('index')

X_train['date'] = pd.to_datetime(X_train['date'])
X_test['date'] = pd.to_datetime(X_test['date'])

In [15]:
# Mesclar `X_train` com `y_train` para ter todas as variáveis juntas para treino
train = pd.merge(
    X_train,
    y_train.rename(columns={'y': 'y'}),  
    left_index=True,
    right_index=True,
    how='inner'
)

In [16]:
extra_regressors = ['job', 'ferie', 'vacances'] #variables exogènes

In [17]:
all_predictions = pd.DataFrame() #initialiser le DataFrame

# Loop pour stations
stations = train['station'].unique()
for station in stations:
    df_station = train[train['station'] == station]
    df_station = df_station.rename(columns={'date': 'ds', 'y': 'y'})

    model = Prophet()

    for regressor in extra_regressors:
        model.add_regressor(regressor)
    
    model.fit(df_station[['ds', 'y'] + extra_regressors])
    
    future = X_test[X_test['station'] == station].copy()
    future = future.rename(columns={'date': 'ds'})
    
    forecast = model.predict(future[['ds', 'job', 'ferie', 'vacances']])
    
    # Formatar `index` para o arquivo final
    forecast['index'] = forecast['ds'].dt.strftime('%Y-%m-%d') + '_' + station
    forecast['y'] = forecast['yhat'].astype(int)  
    forecast['y'] = np.clip(forecast['yhat'], 0, None).astype(int) # Clip negative values to 0
    station_predictions = forecast[['index', 'y']]
    
    all_predictions = pd.concat([all_predictions, station_predictions], axis=0)


all_predictions.to_csv('pred_prophet.csv', index=False)

19:11:21 - cmdstanpy - INFO - Chain [1] start processing
19:11:21 - cmdstanpy - INFO - Chain [1] done processing
19:11:21 - cmdstanpy - INFO - Chain [1] start processing
19:11:22 - cmdstanpy - INFO - Chain [1] done processing
19:11:22 - cmdstanpy - INFO - Chain [1] start processing
19:11:22 - cmdstanpy - INFO - Chain [1] done processing
19:11:23 - cmdstanpy - INFO - Chain [1] start processing
19:11:23 - cmdstanpy - INFO - Chain [1] done processing
19:11:23 - cmdstanpy - INFO - Chain [1] start processing
19:11:23 - cmdstanpy - INFO - Chain [1] done processing
19:11:24 - cmdstanpy - INFO - Chain [1] start processing
19:11:24 - cmdstanpy - INFO - Chain [1] done processing
19:11:24 - cmdstanpy - INFO - Chain [1] start processing
19:11:24 - cmdstanpy - INFO - Chain [1] done processing
19:11:25 - cmdstanpy - INFO - Chain [1] start processing
19:11:25 - cmdstanpy - INFO - Chain [1] done processing
19:11:25 - cmdstanpy - INFO - Chain [1] start processing
19:11:25 - cmdstanpy - INFO - Chain [1]