In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
df_VWCE=pd.read_csv('Datasets/vwce.csv')
df_SP500=pd.read_csv('Datasets/sp500.csv')
df_APPLE=pd.read_csv('Datasets/apple.csv')
df_MSCI=pd.read_csv('Datasets/msci.csv')
df_NAS=pd.read_csv('Datasets/nasdaq.csv')
df_EIMI=pd.read_csv("Datasets/eimi.csv")
sns.set(style="darkgrid")

Data Loading and Cleaning section. Merging to all-in-one dataframe.

In [2]:
# Date conversion for every dataframe 
dfs = [df_SP500, df_MSCI, df_EIMI,df_APPLE, df_NAS]
ordered_dfs = []

for df in dfs:
    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
    ordered_dfs.append(df.sort_values(by='Date', ascending=True).copy())

df_SP500_ordered, df_MSCI_ordered, df_EIMI_ordered, df_APPLE_ordered, df_NAS_ordered = ordered_dfs

#VWCE separated as it has a different date format
df_VWCE_ordered = df_VWCE.copy() 
df_VWCE_ordered['Date'] = pd.to_datetime(df_VWCE_ordered['Date'])
df_VWCE_ordered = df_VWCE_ordered.sort_values(by='Date', ascending=True).copy()

In [3]:
dfs = [df_VWCE_ordered, df_SP500_ordered, df_MSCI_ordered, df_EIMI_ordered, df_APPLE_ordered, df_NAS_ordered]

# Setting date as index
for df in dfs:
    df.set_index('Date', inplace=True)

    # Removing % symbol and convert to float
for df in dfs:
    df['Change %'] = df['Change %'].replace('%', '', regex=True)  # Rimuove '%'
    df['Change %'] = df['Change %'].astype(float)  # Converte in float

In [4]:
# The Sp500 CSV uses ',' as separator, i need to replace it as i want a float value
columns_to_edit = ['Price', 'Open', 'High', 'Low']
for col in columns_to_edit:
    df_SP500_ordered[col] = df_SP500_ordered[col].str.replace(',', '').astype(float)

columns_to_edit = ['Price', 'Open', 'High', 'Low']
for col in columns_to_edit:
    df_NAS_ordered[col] = df_NAS_ordered[col].str.replace(',', '').astype(float)

In [5]:
scaler=MinMaxScaler()
dfs2 = [df_SP500_ordered, df_NAS_ordered, df_EIMI_ordered, df_APPLE_ordered, df_MSCI_ordered]
columns = ['Price', 'Price', 'Price', 'Price', 'Price']  # Colonna 'Price' per tutti tranne MSCI

for i, df in enumerate(dfs2):
    # Normalize prices
    df['Normalized Price'] = scaler.fit_transform(df[[columns[i]]])

In [6]:
df_merged = df_SP500_ordered[['Normalized Price']].merge(
    df_NAS_ordered[['Normalized Price']], 
    left_index=True, 
    right_index=True, 
    suffixes=('_SP500', '_NAS')
)

df_merged = df_merged.merge(
    df_EIMI_ordered[['Normalized Price']], 
    left_index=True, 
    right_index=True,
    suffixes=('', '_EIMI')
)

df_merged = df_merged.merge(
    df_APPLE_ordered[['Normalized Price']], 
    left_index=True, 
    right_index=True, 
    suffixes=('', '_APPLE')
)

df_merged = df_merged.merge(
    df_MSCI_ordered[['Normalized Price']], 
    left_index=True, 
    right_index=True, 
    suffixes=('', '_MSCI')
)

df_merged = df_merged.rename(columns={'Normalized Price': 'Normalized Price_EIMI'})

In [7]:
df_merged2 = df_SP500_ordered[['Price']].merge(
    df_NAS_ordered[['Price']], 
    left_index=True, 
    right_index=True, 
    suffixes=('_SP500', '_NAS')
)

df_merged2 = df_merged2.merge(
    df_EIMI_ordered[['Price']], 
    left_index=True, 
    right_index=True,
    suffixes=('', '_EIMI')
)

df_merged2 = df_merged2.merge(
    df_APPLE_ordered[['Price']], 
    left_index=True, 
    right_index=True, 
    suffixes=('', '_APPLE')
)

df_merged2 = df_merged2.merge(
    df_MSCI_ordered[['Price']], 
    left_index=True, 
    right_index=True, 
    suffixes=('', '_MSCI')
)

df_merged2 = df_merged2.rename(columns={'Price': 'Price_EIMI'})

Exploratory Data Analysis

Prediction Systems with deeplearning: MLP and LSTM 

In [8]:
#Some deep learning: MLP

X = df_merged2[['Price_NAS', 'Price_SP500']].values  # Input
y = df_merged2['Price_APPLE'].values  # Target 

# Convert to tensor
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)  # Target deve essere una colonna

class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(2, 64)  #input
        self.fc2 = nn.Linear(64, 32)  #hidden layer
        self.fc3 = nn.Linear(32, 1)   # Un output
        self.dropout = nn.Dropout(0.3)  # 30% dei neuroni disattivati

    def forward(self, x):
        x = torch.relu(self.fc1(x)) 
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x
model = SimpleModel()

# Loss and optimization function
criterion = nn.MSELoss() 
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
epochs = 100

for epoch in range(epochs):
    model.train()
    
    outputs = model(X_tensor)
    loss = criterion(outputs, y_tensor)

    # Backpropagation and optiomization
    optimizer.zero_grad()  
    loss.backward()  
    optimizer.step() 

    if (epoch + 1) % 10 == 0:  # print loss value
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

prezzo1_input = 21150
prezzo2_input = 6050


input_data = torch.tensor([[prezzo1_input, prezzo2_input]], dtype=torch.float32)

#prediction
model.eval() 
predizione = model(input_data)

print(f"Prediction: {predizione.item():.2f}")

Epoch [10/100], Loss: 3525.8474
Epoch [20/100], Loss: 624.0200
Epoch [30/100], Loss: 1339.1064
Epoch [40/100], Loss: 1426.8162
Epoch [50/100], Loss: 704.7308
Epoch [60/100], Loss: 254.9856
Epoch [70/100], Loss: 265.4350
Epoch [80/100], Loss: 218.7416
Epoch [90/100], Loss: 206.1499
Epoch [100/100], Loss: 201.0036
Prediction: 229.28


In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import pandas as pd
import joblib

# 1. Estrai i dati
X = df_merged2[['Price_NAS', 'Price_SP500']].values
y = df_merged2['Price_APPLE'].values

# 2. Normalizza gli input
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 3. Tensor
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).view(-1, 1)

# 4. Modello
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.fc1 = nn.Linear(2, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

model = SimpleModel()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 5. Training loop
epochs = 500
for epoch in range(epochs):
    model.train()
    outputs = model(X_tensor)
    loss = criterion(outputs, y_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

# 6. Salva il modello e lo scaler
torch.save(model.state_dict(), 'mlp_model.pth')
joblib.dump(scaler, 'scaler.pkl')



Epoch [10/500], Loss: 13447.5039
Epoch [20/500], Loss: 13384.4785
Epoch [30/500], Loss: 13315.4307
Epoch [40/500], Loss: 13230.8818
Epoch [50/500], Loss: 13122.5166
Epoch [60/500], Loss: 12984.2949
Epoch [70/500], Loss: 12809.5586
Epoch [80/500], Loss: 12590.5215
Epoch [90/500], Loss: 12313.7646
Epoch [100/500], Loss: 11972.7236
Epoch [110/500], Loss: 11565.1748
Epoch [120/500], Loss: 11080.1299
Epoch [130/500], Loss: 10507.9092
Epoch [140/500], Loss: 9858.3496
Epoch [150/500], Loss: 9138.3936
Epoch [160/500], Loss: 8358.1953
Epoch [170/500], Loss: 7531.8892
Epoch [180/500], Loss: 6677.5381
Epoch [190/500], Loss: 5816.5933
Epoch [200/500], Loss: 4972.8740
Epoch [210/500], Loss: 4170.9263
Epoch [220/500], Loss: 3434.0347
Epoch [230/500], Loss: 2781.8418
Epoch [240/500], Loss: 2228.2424
Epoch [250/500], Loss: 1779.3334
Epoch [260/500], Loss: 1432.9045
Epoch [270/500], Loss: 1179.2866
Epoch [280/500], Loss: 1003.2570
Epoch [290/500], Loss: 886.3754
Epoch [300/500], Loss: 810.4812
Epoch [3

['scaler.pkl']

In [None]:
import uvicorn
uvicorn.run("app:app", host="127.0.0.1", port=8000, reload=True)

INFO:     Will watch for changes in these directories: ['c:\\Users\\chris\\Desktop\\Financial-Analytics']
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
INFO:     Started reloader process [8512] using StatReload
