<a href="https://colab.research.google.com/github/MicheleGiambelli/Deep-Learning-Project/blob/tommy/Untitled9.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import pandas as pd
import numpy as np
import yfinance as yf
from google.colab import files

In [3]:
ticker = "SOL-USD"
btc_ticker = "BTC-USD"

solana_data = yf.download(ticker, start="2020-04-10")
btc_data = yf.download(btc_ticker, start="2020-04-10")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [4]:

# Ensure the columns are renamed every time this cell is run
solana_data = solana_data.rename(columns={"Adj Close": "SOL_Adj_Close"})
btc_data = btc_data.rename(columns={"Adj Close": "BTC_Adj_Close"})

# Unione dei dati
# Instead of selecting individual columns, use the renamed dataframes directly
df = pd.concat([solana_data, btc_data], axis=1)
df = df.dropna()

# Calcolo dei rendimenti giornalieri
df["SOL_Return"] = df["SOL_Adj_Close"].pct_change()
df["BTC_Return"] = df["BTC_Adj_Close"].pct_change()

# Funzione per calcolare Beta
def rolling_beta(df, window):
    cov = df["SOL_Return"].rolling(window).cov(df["BTC_Return"])
    var = df["BTC_Return"].rolling(window).var()
    return cov / var

# Define n before using it in rolling_beta
n = 20  # For example, a 20-day rolling window for beta calculation

# Aggiungere Beta
df["Beta"] = rolling_beta(df, n)
df = df.dropna()


In [5]:
n = 20  # Periodo per bande di Bollinger e Beta
k = 2  # Deviazioni standard per bande di Bollinger

# Calcolo Bande di Bollinger
df["SMA"] = df["SOL_Adj_Close"].rolling(window=n).mean()
df["StdDev"] = df["SOL_Adj_Close"].rolling(window=n).std()
df["Upper_Band"] = df["SMA"] + k * df["StdDev"]
df["Lower_Band"] = df["SMA"] - k * df["StdDev"]
df = df.dropna()


In [6]:
def rolling_sharpe_ratio(df, window, risk_free_rate):
    rolling_mean = df["SOL_Return"].rolling(window).mean()
    rolling_std = df["SOL_Return"].rolling(window).std()
    return (rolling_mean - risk_free_rate) / rolling_std
risk_free_rate = 0.01
# Aggiungere Sharpe Ratio
df["Sharpe_Ratio"] = rolling_sharpe_ratio(df, n, risk_free_rate)
df = df.dropna()


In [15]:
df.drop(df.columns[[6,7,8,9,10,11,13]], axis=1, inplace=True) # tolgo le colonne relative a BTC che non mi servono
df.head()

Price,SOL_Adj_Close,Close,High,Low,Open,Volume,SOL_Return,Beta,SMA,StdDev,Upper_Band,Lower_Band,Sharpe_Ratio
Ticker,SOL-USD,SOL-USD,SOL-USD,SOL-USD,SOL-USD,SOL-USD,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
2020-06-07 00:00:00+00:00,0.616578,0.616578,0.624444,0.593398,0.622443,716785,-0.009423,0.427358,0.595371,0.027581,0.650533,0.540209,-0.201951
2020-06-08 00:00:00+00:00,0.668313,0.668313,0.679001,0.61331,0.615078,1440234,0.083907,0.42893,0.597564,0.031483,0.660529,0.534599,-0.085599
2020-06-09 00:00:00+00:00,0.658002,0.658002,0.668088,0.627242,0.667784,988327,-0.015428,0.346123,0.601506,0.0339,0.669306,0.533706,-0.03844
2020-06-10 00:00:00+00:00,0.644867,0.644867,0.670043,0.633404,0.658038,1096203,-0.019962,0.524364,0.603304,0.03524,0.673784,0.532823,-0.103663
2020-06-11 00:00:00+00:00,0.573742,0.573742,0.650535,0.570082,0.644888,1122221,-0.110294,0.739265,0.600047,0.034787,0.66962,0.530474,-0.229623


**<h1>Pytorch Dataset</h1>**

### Description of the Code below

This code demonstrates how to preprocess a dataset stored in a Pandas DataFrame and transform it into a format suitable for use with PyTorch, while ensuring the data can be efficiently loaded in batches for training or inference.

1. **Feature and Target Definition**:
   - The column `Close` is chosen as the target variable (`y`), representing the Solana price.
   - The remaining columns are used as the feature set (`X`).

2. **Feature Normalization**:
   - The features are scaled using `StandardScaler` from `sklearn` to ensure they have zero mean and unit variance. This helps improve the stability and convergence of training neural networks.

3. **Conversion to PyTorch Tensors**:
   - The normalized features (`X`) are converted into a PyTorch tensor of type `torch.float32`.
   - The target (`y`) is similarly converted into a tensor, reshaped to have a shape of `(-1, 1)` to align with PyTorch's supervised learning expectations.

4. **Custom DataLoader Function**:
   - A function, `load_array`, is defined to create a PyTorch `DataLoader`. This function:
     - Accepts `data_arrays` (a tuple of feature and target tensors).
     - Uses unpacking (`*data_arrays`) to pass the tensors dynamically to `TensorDataset`.
     - Returns a `DataLoader` with a specified batch size.
     - Includes the `is_train` parameter to control whether data shuffling is enabled. Here, it defaults to `False` to maintain the historical order of the data.

5. **Batch Loading**:
   - The tensors for features (`X_tensor`) and target (`y_tensor`) are packed into a tuple `data_arrays`.
   - A `DataLoader` is created with a batch size of 32, ensuring that the data is processed in manageable chunks.

This code is well-suited for historical data processing (e.g., time-series or financial data) where the order of data is important, thanks to the use of `shuffle=False`. The preprocessing ensures the dataset is ready for efficient model training or evaluation in PyTorch.

In [34]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler

# Assume your DataFrame pandas is named `df`
# Define 'Close' as the target and the rest as features
X = df.drop(columns=['Close'], level = 0).values  # Features
y = df['Close'].values  # Target

# Normalize the features, referring to the same numerical scale
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_normalized, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

# Define a function for loading data with unpacking
def load_array(data_arrays, batch_size, is_train=False): # False because we want to read the data historically
    """Construct a PyTorch DataLoader."""
    dataset = TensorDataset(*data_arrays)  # Unpacking the tensors
    return DataLoader(dataset, batch_size=batch_size, shuffle=is_train)

# Create the DataLoader
data_arrays = (X_tensor, y_tensor)  # Pack tensors into a tuple
batch_size = 32
data_loader = load_array(data_arrays, batch_size)


In [35]:
# Check the first data examples
next(iter(data_loader))

[tensor([[-1.0299, -1.0340, -1.0272, -1.0291, -0.8493, -0.2259, -1.1023, -1.0366,
          -0.9305, -1.0500, -0.9965, -0.2898],
         [-1.0291, -1.0332, -1.0269, -1.0292, -0.8490,  1.1516, -1.1000, -1.0365,
          -0.9299, -1.0498, -0.9966,  0.1287],
         [-1.0292, -1.0334, -1.0267, -1.0284, -0.8492, -0.3146, -1.2200, -1.0365,
          -0.9296, -1.0497, -0.9966,  0.2983],
         [-1.0294, -1.0333, -1.0266, -1.0285, -0.8491, -0.3815, -0.9618, -1.0365,
          -0.9294, -1.0496, -0.9966,  0.0637],
         [-1.0305, -1.0336, -1.0276, -1.0287, -0.8491, -1.7148, -0.6504, -1.0365,
          -0.9294, -1.0497, -0.9967, -0.3893],
         [-1.0302, -1.0343, -1.0276, -1.0298, -0.8492,  0.4309, -0.6187, -1.0365,
          -0.9294, -1.0497, -0.9967, -0.1620],
         [-1.0300, -1.0342, -1.0273, -1.0295, -0.8493,  0.2878, -0.9034, -1.0365,
          -0.9297, -1.0497, -0.9966,  0.2104],
         [-1.0303, -1.0341, -1.0274, -1.0292, -0.8493, -0.6168, -0.9363, -1.0365,
          -0.92