In [1]:
import os
import pandas as pd

pd.set_option('display.max_columns', None)

# Suppress Warnings
import warnings
warnings.filterwarnings("ignore")

import fs_modelling as fs
from prophet import Prophet

import plotly.express as px

**Data**

In [2]:
X_train = pd.read_csv('./data/X_train_data/X_train.csv', index_col = 'Unnamed: 0')
X_val = pd.read_csv('./data/X_val_data/X_val.csv', index_col = 'Unnamed: 0')

def import_all_csvs_as_vars(folder):
    for file in os.listdir(folder):
        if file.endswith('.csv'):
            df_name = file.replace('.csv', '')
            df = pd.read_csv(os.path.join(folder, file), index_col = 'Unnamed: 0')
            globals()[df_name] = df
            # print(f"Loaded {df_name}")

# Import each CSV file as individual DataFrames
import_all_csvs_as_vars('data/y_train_data')
import_all_csvs_as_vars('data/y_val_data')

In [3]:
X_train.head()

Unnamed: 0,CHI Production Index,CHI Shipments Index,FRA Production Index,FRA Shipments Index,GER Production Index,GER Shipments Index,ITA Production Index,ITA Shipments Index,JAP Production Index,JAP Shipments Index,SWI Production Index,SWI Shipments Index,UK Production Index,UK Shipments Index,USA Production Index,USA Shipments Index,Europe Production Index,Europe Shipments Index,(W) Price of Base Metals,(W) Price of Energy,(W) Price of Metals & Minerals,(W) Price of Natural gas index,"(W) Price of Crude oil, average",(W) Price of Copper,USA EUR to LCU Conversion Rate,USA EE Producer Prices,UK EE Producer Prices,ITA EE Producer Prices,FRA EE Producer Prices,GER EE Producer Prices,CHI EE Producer Prices,USA Machinery & Equipment Index,(W) Machinery & Equipment Index,SWI Machinery & Equipment Index,UK Machinery & Equipment Index,ITA Machinery & Equipment Index,JAP Machinery & Equipment Index,FRA Machinery & Equipment Index,GER Machinery & Equipment Index,USA EE Production Index,(W) EE Production Index,SWI EE Production Index,UK EE Production Index,ITA EE Production Index,JAP EE Production Index,FRA EE Production Index,GER EE Production Index,CC_CHI,CC_FRA,CC_GER,CC_ITA,CC_JAP,CC_Europe,CC_SWI,CC_UK,CC_USA,BC_CHI,BC_FRA,BC_GER,BC_ITA,BC_JAP,BC_Europe,BC_SWI,BC_UK,BC_USA,stock_price,stock_price_change,stock_volume,Covid_Chi,Covid_Fra,Covid_Ger,Covid_Ita,Covid_Swi,Covid_UK,Covid_US,Clean_Chi,Clean_Fra,Clean_Ger,Clean_Ita,Clean_Swi,Clean_UK,Clean_US,Fossil_Chi,Fossil_Fra,Fossil_Ger,Fossil_Ita,Fossil_Swi,Fossil_UK,Fossil_US,Buildings_Ger,Year,Month,Semester,sin_Semester,cos_Semester,Quarter,sin_Quarter,cos_Quarter,sin_month,cos_month,WeekendDaysCount,TotalDaysInMonth,WeekendDaysPercentage,SundayCount,GerHolidayCount
2018-10-01,211.955755,211.955755,108.280608,122.451734,124.227879,137.741953,118.350514,122.456894,125.209957,124.79325,109.077781,104.594781,113.659322,112.31803,111.90254,127.808839,124.391967,130.989253,86.102586,100.222169,79.354986,89.570796,97.072264,82.545082,1.1484,110.700409,111.463669,105.297836,102.064743,109.119614,96.318329,111.422638,131.340118,106.816937,110.89345,129.389221,131.503786,114.72081,127.461136,112.853256,129.325775,112.970843,118.670791,93.001511,112.376774,97.849541,118.298233,102.8056,98.94864,101.5778,101.7588,100.3849,100.7815,100.318,100.6982,101.5022,98.79642,101.0451,101.7476,100.9696,101.5259,100.9295,102.0386,102.0568,101.2177,91.7,-7.81,61460000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,190.49,37.32,24.59,7.42,4.27,12.65,113.47,368.15,3.84,26.43,14.74,0.3,10.65,213.52,11738.0,2018,10,2,-2.449294e-16,1.0,4,-2.449294e-16,1.0,-0.8660254,0.5,8,31,25.806452,4,1
2018-11-01,220.519655,220.519655,99.636911,115.95821,127.404132,142.732193,107.71926,120.132032,122.624695,123.289888,112.241491,107.656238,115.088417,112.801011,110.880401,117.675874,123.070091,132.93413,83.841374,84.436807,77.500875,97.362468,78.841167,82.230939,1.1367,110.994026,111.668373,105.297836,102.064743,109.224838,95.370118,109.737129,129.976456,110.792831,112.119922,117.990173,127.880755,104.8731,132.987915,113.145294,128.236176,114.736013,120.467019,84.1334,111.907535,91.15596,117.163727,103.2386,98.71818,101.5438,101.645,100.285,100.6595,100.3069,100.439,101.3958,98.45412,100.9598,101.5859,100.8509,101.5088,100.9174,101.8399,102.2486,101.0049,92.31,0.67,48250000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,167.99,39.31,22.61,8.2,4.98,13.11,119.73,406.89,5.48,29.52,14.79,0.31,11.74,204.12,11177.0,2018,11,2,-2.449294e-16,1.0,4,-2.449294e-16,1.0,-0.5,0.866025,8,30,26.666667,4,0
2018-12-01,241.846854,241.846854,94.690312,115.128469,120.518565,141.407661,88.783181,131.936099,122.991956,124.508413,115.405201,110.717696,101.556108,94.503733,106.257796,123.280134,113.858005,131.261348,82.733389,74.898746,76.071705,94.406578,68.268564,80.630361,1.1384,111.162231,112.794266,105.297836,102.16671,109.330063,94.994885,103.44828,124.202469,114.768725,99.446384,99.191734,128.125679,104.974617,137.363281,111.823624,117.043549,116.501182,105.378705,64.881248,112.524242,78.033028,89.626122,103.6305,98.62968,101.519,101.4642,100.1741,100.5454,100.1724,100.136,101.2298,98.18901,100.852,101.3715,100.7023,101.4458,100.8464,101.6098,102.3394,100.692,87.78,-4.9,48710000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,161.51,47.36,27.8,7.87,5.26,13.08,130.01,479.9,4.54,25.1,15.39,0.32,11.93,213.35,11919.0,2018,12,2,-2.449294e-16,1.0,4,-2.449294e-16,1.0,-2.449294e-16,1.0,10,31,32.258065,5,2
2019-01-01,175.668147,175.668147,90.143775,108.325154,104.776326,118.703828,99.275113,92.418842,105.514024,106.303946,109.621788,105.164427,101.799754,99.65921,107.510808,111.043755,106.418002,113.057565,81.251093,76.204771,75.774153,80.055366,71.587526,78.822478,1.1416,111.386055,112.077789,105.505936,102.778488,109.750961,94.950134,106.506844,112.1754,109.196212,98.833149,106.991089,110.200941,95.228432,104.43293,109.499725,111.522202,110.35461,107.174933,81.556343,95.957352,81.908257,105.406097,103.9405,98.97886,101.5336,101.2748,100.0272,100.518,99.97832,99.85248,101.0311,98.12537,100.7477,101.138,100.5383,101.3079,100.6773,101.4533,101.9919,100.5415,86.34,-1.64,52630000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,148.69,48.01,27.61,7.9,5.56,12.01,134.69,419.34,6.27,29.35,17.37,0.32,15.13,226.12,11089.0,2019,1,1,1.224647e-16,-1.0,1,1.0,6.123234000000001e-17,0.5,0.866025,8,31,25.806452,4,1
2019-02-01,175.668147,175.668147,92.551521,108.944656,109.597012,122.686997,104.586684,103.214212,114.466647,114.996302,103.838375,99.611158,103.495768,100.589459,109.817608,116.736921,110.42959,117.704727,84.681739,80.086039,80.117198,69.545042,77.344049,83.61877,1.1351,111.561142,111.361313,105.297836,102.676529,109.856194,94.712204,109.303719,116.603294,103.623699,99.54859,113.79052,117.976532,100.101524,112.838226,110.835655,112.666725,104.208038,110.64764,83.451233,107.309956,80.322929,103.652771,104.1535,99.37622,101.548,101.0606,99.87872,100.5431,99.80544,99.72394,101.0838,98.23363,100.6161,100.8785,100.4249,101.1329,100.5132,101.3497,101.3867,100.3775,86.66,0.37,48050000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,148.64,43.91,24.1,8.06,4.5,11.95,121.73,419.34,4.85,25.22,13.14,0.3,10.32,194.84,10465.0,2019,2,1,1.224647e-16,-1.0,1,1.0,6.123234000000001e-17,0.8660254,0.5,8,28,28.571429,4,0


In [4]:
# holds all external data
#external_data = 

# holds all product data, including all columns related to dates
#product_data = 

In [5]:
# This will hold all future data about features that are not in the test data
future_data_predictions = pd.DataFrame()

original = X_train.copy()

In [6]:
columns_example = [
    "CHI Production Index", "CHI Shipments Index",
    "FRA Production Index", "FRA Shipments Index",
    "GER Production Index", "GER Shipments Index",
    "ITA Production Index", "ITA Shipments Index",
    "JAP Production Index", "JAP Shipments Index",
    "SWI Production Index", "SWI Shipments Index"
]

In [7]:
# Iterate through each column in 'gdk_features_to_predict'
for column in columns_example:
    # Isolate the current column into a new DataFrame 'df1'
    df1 = original[[column]].copy()
    # Reset the index of 'df1' and rename columns to fit Prophet's expected format
    data = (df1.reset_index() \
        .rename(columns={'index':'ds',
                        f'{column}':'y'}))
    # Initialize Prophet
    model = Prophet()
    # Fit the model to the data
    model.fit(data)
    
    # Create a DataFrame representing future dates to make predictions
    # 'periods=10' is the number of future points to predict and 'freq='MS' sets the frequency to monthly start
    future = model.make_future_dataframe(periods=10, freq='MS')

    # Forecast future dates
    forecast_Production_Index = model.predict(future)
    # Keep only the 'ds' (date) and 'yhat' (predicted value) columns --> simplicity
    forecast_Production_Index  = forecast_Production_Index[['ds', 'yhat']]
    # Set the date column as the index
    forecast_Production_Index= forecast_Production_Index.set_index('ds')
    # Add the forecasted values to the 'future_features' DataFrame
    future_data_predictions[column] = forecast_Production_Index['yhat'].values

20:27:49 - cmdstanpy - INFO - Chain [1] start processing
20:27:49 - cmdstanpy - INFO - Chain [1] done processing
20:27:50 - cmdstanpy - INFO - Chain [1] start processing
20:27:50 - cmdstanpy - INFO - Chain [1] done processing
20:27:50 - cmdstanpy - INFO - Chain [1] start processing
20:27:50 - cmdstanpy - INFO - Chain [1] done processing
20:27:50 - cmdstanpy - INFO - Chain [1] start processing
20:27:50 - cmdstanpy - INFO - Chain [1] done processing
20:27:50 - cmdstanpy - INFO - Chain [1] start processing
20:27:50 - cmdstanpy - INFO - Chain [1] done processing
20:27:50 - cmdstanpy - INFO - Chain [1] start processing
20:27:51 - cmdstanpy - INFO - Chain [1] done processing
20:27:51 - cmdstanpy - INFO - Chain [1] start processing
20:27:51 - cmdstanpy - INFO - Chain [1] done processing
20:27:51 - cmdstanpy - INFO - Chain [1] start processing
20:27:51 - cmdstanpy - INFO - Chain [1] done processing
20:27:51 - cmdstanpy - INFO - Chain [1] start processing
20:27:51 - cmdstanpy - INFO - Chain [1]

In [8]:
# Reset the index to use date as a regular column
forecast_Production_Index.reset_index(inplace=True)
# Add the date column to 'future_features' --> keep track of the forecast dates
future_data_predictions['ds'] = forecast_Production_Index['ds'].values
# Set date as the index of 'future_features' --> facilitate time series analysis
future_data_predictions.set_index('ds', inplace = True)

In [None]:
future_data_predictions.tail()

Unnamed: 0_level_0,CHI Production Index,CHI Shipments Index,FRA Production Index,FRA Shipments Index,GER Production Index,GER Shipments Index,ITA Production Index,ITA Shipments Index,JAP Production Index,JAP Shipments Index,SWI Production Index,SWI Shipments Index
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-10-01,301.540607,301.540607,95.572187,112.110575,128.057386,147.787431,106.248298,119.491503,141.359838,143.00857,109.684411,105.690907
2021-11-01,315.777307,315.777307,88.765802,110.540669,133.762221,155.667993,99.275465,118.608952,143.094797,145.393562,112.113073,108.026768
2021-12-01,338.237259,338.237259,87.828027,114.174042,132.947095,160.600638,83.503887,132.086903,151.908768,155.323554,114.583723,110.351349
2022-01-01,254.833214,254.833214,81.090252,98.745987,114.907141,133.580997,89.928999,93.055194,138.295336,140.80491,111.8195,107.759796
2022-02-01,255.699789,255.699789,86.895187,106.9695,125.375934,145.754203,102.543705,108.358843,148.357399,151.45666,109.031735,105.178863


In [None]:
# Assign data from May 1, 2022 until February 1, 2023 in a new DataFrame: 'future_features_predicted'
future_data = future_data_predictions.loc['2022-05-01':'2023-02-01']

# depois dar replace por future_data

In [13]:
fig = px.line(future_data_predictions, x=future_data_predictions.index, y=future_data_predictions.columns, title="Future Values",
              labels={"value": "Value", "TIME_PERIOD": "Date"})
fig.update_layout(xaxis_title="Date", yaxis_title="Value", 
                  legend_title="Countries")
fig.show()