In [2]:
import pandas as pd
import matplotlib.pyplot as plt
df_GDP=pd.read_csv("GDP.csv")
df_INF=pd.read_csv("INF.csv")
df_UNEMP=pd.read_csv("UNEMP.csv")

In [3]:
# First of all, as i want to perform a 30 year analysis on the 3 datasets (30 years is the best period talking about consistency in these dataset), i need to drop and clean 
dfs=[df_GDP, df_INF, df_UNEMP]

for df in dfs:
    df.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code', 'Unnamed: 68'], inplace=True)
    df.set_index('Country Name', inplace=True)

In [4]:
# Only 1993-2024 column filtering
dfs2 = [df_GDP, df_INF, df_UNEMP]
nn_dfs = []
for df in dfs2:
    nn_dfs.append(df.loc[:, df.columns.astype(int) > 1993])

df_GDP, df_INF, df_UNEMP = nn_dfs

# Find intersections
common_index = df_GDP.index.intersection(df_UNEMP.index).intersection(df_INF.index)
filtered_dfs = []
for df in nn_dfs:  # Usa nn_dfs, che contiene gi√† i DataFrame filtrati
    filtered_dfs.append(df.loc[common_index])

df_GDP_filtered, df_INF_filtered, df_UNEMP_filtered = filtered_dfs

In [5]:
#Good! Now that we have the same raws and columns for every dataframe, we need to check what is still NaN and drop
df_GDP=df_GDP.dropna()
df_INF=df_INF.dropna()
df_UNEMP=df_UNEMP.dropna()

In [6]:
#I want readable dataframes: convert to billions GDP and only 2 decimals. Way better!

df_GDP_filtered.columns = df_GDP_filtered.columns.astype(int)  # Converti i nomi delle colonne in int
df_GDP_filtered.loc[:, 1994:2023] = df_GDP_filtered.loc[:, 1994:2023] / 1_000_000_000
df_GDP_filtered = df_GDP_filtered.round(2)
df_INF_filtered = df_INF_filtered.round(2)
df_UNEMP_filtered = df_UNEMP_filtered.round(2)

In [7]:
#Now that we achieved a readable dataframe, the best way to work is convert from wide to long format using melt, and the merge to have a all-in-one dataframe.
df_inflation_long = df_INF_filtered.reset_index().melt(id_vars=['Country Name'], var_name='Year', value_name='Inflation')
df_UNEMP_long = df_UNEMP_filtered.reset_index().melt(id_vars=['Country Name'], var_name='Year', value_name='Unemployment')
df_GDP_long = df_GDP_filtered.reset_index().melt(id_vars=['Country Name'], var_name='Year', value_name='GDP')

for df in [df_GDP_long, df_inflation_long, df_UNEMP_long]:
    df["Year"] = df["Year"].astype(int)

df_merged = df_GDP_long.merge(df_inflation_long, on=["Country Name", "Year"], how="inner") \
                  .merge(df_UNEMP_long, on=["Country Name", "Year"], how="inner")

df_merged.set_index('Country Name')
df_merged.to_csv('Macroeconomics_dataset.csv')
#Way better! This will be good for Tableau!

In [8]:
# Create a Df for a Country
gdp_transposed = df_GDP_filtered.loc['Italy'].T
inf_transposed = df_INF_filtered.loc['Italy'].T
unemp_transposed = df_UNEMP_filtered.loc['Italy'].T

gdp_transposed.index = gdp_transposed.index.astype(str) 
inf_transposed.index = inf_transposed.index.astype(str)
unemp_transposed.index = unemp_transposed.index.astype(str)

df_italy = pd.DataFrame({
    'GDP': gdp_transposed,
    'Inflation': inf_transposed,
    'Unemployment': unemp_transposed
})

df_italy['Year'] = df_italy.index
df_italy.set_index('Year', inplace=True)

In [9]:
# Calcolare le caratteristiche
df_italy['target'] = (df_italy['GDP'].shift(-1) < df_italy['GDP']).astype(int)
df_italy = df_italy.dropna()

# Suddividere i dati in training e test set
train_data = df_italy[df_italy.index < '2015']
test_data = df_italy[df_italy.index >= '2015']

# Definire le variabili di input (X) e la variabile target (y)
X_train = train_data[['Unemployment', 'Inflation']]
y_train = train_data['target']

X_test = test_data[['Unemployment', 'Inflation']]
y_test = test_data['target']

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Creare il modello Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Allenare il modello sui dati di training
model.fit(X_train, y_train)
# Fare le previsioni sul test set
y_pred = model.predict(X_test)

# Supponiamo che il prezzo di chiusura del 2 gennaio 2025 sia 150
new_data = pd.DataFrame({
    'Unemployment': [df_italy['Unemployment'].loc['2008']],  # Ultima SMA_5 calcolata
    'Inflation': [df_italy['Inflation'].loc['2008']],  # Ultima variazione percentuale
})

# Fare la previsione per il 3 gennaio 2025
prediction = model.predict(new_data)

# Mostrare la previsione
if prediction == 1:
    print("Recessione")
else:
    print("Non recessione.")

Recessione
