Qui procederemo a creare un insieme di modelli che lavorano contemporaneamente producendo i loro output sulla base dei quali verrà applicata la legge della total variance.



In [55]:

! pip install wandb
! pip install finta
! pip install pandas_ta

import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import statistics as stat
import wandb

wandb.login()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")




[34m[1mwandb[0m: Currently logged in as: [33mstefanobutera98[0m. Use [1m`wandb login --relogin`[0m to force relogin


# Model class

In [56]:
class CNNLSTMModel(nn.Module):
  def __init__(self, input_size, num_lstm_layers, dropout,
               sequence_length,nStepsAhead):
    super().__init__()
    self.jitter = 1e-6

    # model definition
    self.hybridNetwork = nn.Sequential(
        nn.Conv1d(in_channels=input_size,
                  out_channels=sequence_length,
                  kernel_size=1),
        nn.ReLU(),
        nn.LSTM(input_size=sequence_length,
                hidden_size=sequence_length,
                num_layers=num_lstm_layers,
                dropout=dropout,
                batch_first=False,bidirectional=True),
    )
    self.endLinearLayer = nn.Linear(2 * sequence_length,2 * nStepsAhead)

  def forward(self, x):
    # Apply CNN layers
    x = x.permute(0, 2, 1)
    x = self.hybridNetwork[0](x)  # Conv1d
    x = self.hybridNetwork[1](x)  # ELU activation

    # Permute the dimensions for LSTM
    x = x.permute(2, 0, 1)

    # Apply LSTM layers
    x, (hidden, cell) = self.hybridNetwork[2](x)

    # Only take the output from the last time-step
    x = x[-1, :, :]

    # Apply final linear layer
    x = self.endLinearLayer(x)
    means ,variances = x.chunk(2, dim = -1)
    variances = self.jitter + variances
    variances = torch.clamp(variances, min=1e-3)  # Enforce a minimum positive variance
    distributions = torch.distributions.Normal(means, variances)
    return distributions

class HybridModelFCLSTM(nn.Module):

  def __init__(self,input_size,num_lstm_layers,dropout,
               window_size,nStepsAhead):
    super().__init__()

    # Definizione del modello usando nn.Sequential
    self.hybridNetwork = nn.Sequential(
        nn.Linear(input_size, window_size),
        nn.ReLU(),
        nn.Linear(window_size,window_size),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.LSTM(input_size=window_size,
                hidden_size=window_size,
                num_layers=num_lstm_layers,
                dropout=dropout, # il dropout viene inserito qui o devo inserire io nella pipline lo strato di dropOut?
                batch_first=False,bidirectional=True),
    )
    self.endLinearLayer = nn.Linear(window_size,nStepsAhead)

  def forward(self, x):
    # Apply FC layers
   # x = x.permute(0, 2, 1)
    x = self.hybridNetwork[0](x)
    x = self.hybridNetwork[1](x)

    # Permute the dimensions for LSTM
    x = x.permute(1, 0, 2)


    # Apply LSTM layers
    x, (hidden, cell) = self.hybridNetwork[5](x)

    # Only take the output from the last time-step
    x = x[-1, :, :]

    # Apply final linear layer
    x = self.endLinearLayer(x)
    means ,variances = x.chunk(2, dim = -1)
    variances = self.jitter + variances
    variances = torch.clamp(variances, min=1e-4)  # Enforce a minimum positive variance
    distributions = torch.distributions.Normal(means, variances)
    return x




## Configuration

In [57]:


# farli eseguire le predizioni a tutti e gestirle, applicare la legge della varianza


# Predictions

## Make DataSet

In [58]:

import pandas as pd
from finta import TA
import pandas_ta as pta
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
def makeDataSet(extended=False):
  dataSet = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/FeatureEngineering&DATA/data/BTCUSDT-spot-1h.csv",parse_dates=True)

  SpxData = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/FeatureEngineering&DATA/data/^SPX.csv", parse_dates=True)
  SpxData.rename(columns={'Date': 'date'}, inplace=True)
  SpxData.set_index('date', inplace=True)

  DxyData = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/FeatureEngineering&DATA/data/DX-Y.NYB.csv", parse_dates=True)
  DxyData.rename(columns={'Date': 'date'}, inplace=True)
  DxyData.set_index('date', inplace=True)

  DIXGEXData = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/FeatureEngineering&DATA/data/DIX.csv", parse_dates=True)
  DIXGEXData.rename(columns={'Date': 'date'}, inplace=True)
  DIXGEXData.set_index('date', inplace=True)
  dataSet = dataSet.iloc[1:]

  # Feature extraction using both `finta` and `pandas_ta`

  # Exponential Moving Average (EMA) features
  dataSet["EMA_21"] = TA.EMA(dataSet, 21)
  dataSet["EMA_50"] = TA.EMA(dataSet, 50)
  dataSet["EMA_200"] = TA.EMA(dataSet, 200)
  dataSet["HLC3"] = (dataSet["close"]  + dataSet["high"] + dataSet["low"] ) / 3

  # Average True Range (ATR) features
  dataSet["ATR"] = TA.ATR(dataSet, 24)  # Daily ATR
  dataSet["ATR_168"] = TA.ATR(dataSet, 168)  # Weekly ATR

  # Relative Strength Index (RSI)
  dataSet["RSI"] = TA.RSI(dataSet, 14)

  # Moving Average Convergence Divergence (MACD)
  macd_data = TA.MACD(dataSet)
  dataSet["MACD_line"] = macd_data["MACD"]
  dataSet["MACD_signal"] = macd_data["SIGNAL"]

  # Hull Moving Average (HMA) features

  dataSet["HMA_24"] = pta.hma(dataSet["close"], 24)

  # Bollinger Bands (BB) features
  bbands_10_data = pta.bbands(dataSet["close"], length=10, std=1.5)
  bbands_20_data = pta.bbands(dataSet["close"], length=20, std=2)
  bbands_50_data = pta.bbands(dataSet["close"], length=50, std=2.5)

  # Join BB data to the main dataset
  dataSet = dataSet.join(bbands_10_data)
  dataSet = dataSet.join(bbands_20_data)
  dataSet = dataSet.join(bbands_50_data)

  # More indicators
  dataSet["ebws"] = pta.ebsw(dataSet["close"])
  fisher_data = pta.fisher(dataSet["high"], dataSet["low"], 16)
  dataSet["FISHER"] = fisher_data.iloc[:, 0]
  dataSet["FISHERT"] = fisher_data.iloc[:, 1]

  # Detrended Price Oscillator (DPO)
  dataSet["DPO14"] = pta.dpo(close=dataSet['close'], length=14, centered=False)
  # Convert the 'date' column to a datetime format
  dataSet['date'] = pd.to_datetime(dataSet['date'])

# Set the 'date' column as the index
  dataSet = dataSet.set_index('date')


  dataSetD = dataSet.resample('D').agg({
      'open': 'first',  # Primo valore del giorno
      'high': 'max',  # Massimo valore delle 24 ore
      'low': 'min',  # Minimo valore delle 24 ore
      'close': 'last'  # Ultimo valore del giorno
  })

  previous_day = dataSetD.shift(1)
  pp = (dataSetD['high'] + dataSetD['low'] + dataSetD['close']) / 3

  r4 = pp + ((previous_day["high"] - previous_day["low"]) * 1.382)
  r3 = pp + ((previous_day["high"] - previous_day["low"]) * 1)
  r2 = pp + ((previous_day["high"] - previous_day["low"]) * 0.618)
  r1 = pp + ((previous_day["high"] - previous_day["low"]) * 0.382)

  s1 = pp - ((previous_day["high"] - previous_day["low"]) * 0.382)
  s2 = pp - ((previous_day["high"] - previous_day["low"]) * 0.618)
  s3 = pp - ((previous_day["high"] - previous_day["low"]) * 1)
  s4 = pp - ((previous_day["high"] - previous_day["low"]) * 1.382)

  # Combine the pivot levels into a single DataFrame
  pivot_data = pd.DataFrame(
      {
          'pivot': pp,
          's1': s1,
          's2': s2,
          's3': s3,
          's4': s4,
          'r1': r1,
          'r2': r2,
          'r3': r3,
          'r4': r4
      },
      index=dataSetD.index)
  # Merge the pivot data for each day into each row of hourly data that falls under that day
  dataSet = pd.merge_asof(dataSet.sort_index(),
                          pivot_data.sort_index(),
                          left_index=True,
                          right_index=True,
                          direction='backward')

  # 35, 18, 10
  # Future Line of Demarcation (FLD)
  for period in [97, 193, 385]:  # Organize the periods to calculate FLD
    src = (dataSet["close"]  + dataSet["high"] + dataSet["low"] ) / 3
    dataSet[f'FLD{period}'] = src.shift(period)
    dataSet[f'FLD{period}'].fillna(0, inplace=True)



  # Volume Weighted Average Price (VWAP)
  dataSet["D-VWAP"] = pta.vwap(high=dataSet["high"],
                              low=dataSet["low"],
                              close=dataSet["close"],
                              volume=dataSet["volume"])
  dataSet["W-VWAP"] = pta.vwap(high=dataSet["high"],
                              low=dataSet["low"],
                              close=dataSet["close"],
                              volume=dataSet["volume"],
                              anchor="W")

  # Volume Weighted Moving Average (VWMA)
  dataSet["VWMA"] = pta.vwma(close=dataSet["close"],
                            volume=dataSet["volume"],
                            length=20)

  # Easy of market move
  dataSet["EOM"] = pta.eom(high=dataSet["high"],
                          low=dataSet["low"],
                          close=dataSet["close"],
                          volume=dataSet["volume"])

  # Money flow index:
  dataSet["MFI"] = pta.mfi(high=dataSet["high"],
                          low=dataSet["low"],
                          close=dataSet["close"],
                          volume=dataSet["volume"],
                          length=14)

  SpxData.drop(SpxData.index[:2])
  DxyData.drop(DxyData.index[:3])
  SpxData.fillna(method='ffill', inplace=True)
  DxyData.fillna(method='ffill', inplace=True)

  print("_____ test IMR 1________ ")
  print(SpxData.head())
  print(DxyData.head(-1))
  print("______________________")
  # inter-market relation (IMR)
  SpxData['SPX500-DailyReturn%'] = SpxData['Close'].pct_change() * 100
  SpxData['SPXEMA20'] = TA.EMA(SpxData, 20)

  DxyData['DXY-DailyReturn%'] = DxyData['Close'].pct_change() * 100
  DxyData['DXYEMA200'] = TA.EMA(DxyData, 200)

  dataSetIMR = dataSet  # in this way we can test if IMR is useful or not

  #SpxData.drop(SpxData.index[:7168])
  #DxyData.drop(DxyData.index[:7168])

  print("_____ test IMR 2________ ")
  #print(SpxData.head())
  #print(DxyData.head(-1))
  print("______________________")

  SpxData.index = pd.to_datetime(SpxData.index)
  DxyData.index = pd.to_datetime(DxyData.index)
  DIXGEXData.index = pd.to_datetime(DIXGEXData.index)
  SpxData.index = SpxData.index.tz_localize(None)
  DxyData.index = DxyData.index.tz_localize(None)
  DIXGEXData.index = DIXGEXData.index.tz_localize(None)
  dataSetIMR.index = dataSetIMR.index.tz_localize(
      None) if dataSetIMR.index.tz is not None else dataSetIMR.index

  print("test DIX ______________")
  #print(DIXGEXData.head())
  #print(DIXGEXData.head(-1))
  print("______________________")

  dataSetIMR = pd.merge_asof(dataSetIMR.sort_index(),
                            SpxData[['SPX500-DailyReturn%', 'SPXEMA20']],
                            left_index=True,
                            right_index=True,
                            direction='backward')
  dataSetIMR = pd.merge_asof(dataSetIMR.sort_index(),
                            DxyData[['DXY-DailyReturn%', 'DXYEMA200']],
                            left_index=True,
                            right_index=True,
                            direction='backward')
  dataSetIMR = pd.merge_asof(dataSetIMR.sort_index(),
                            DIXGEXData[['dix', 'gex']],
                            left_index=True,
                            right_index=True,
                            direction='backward')

  print("test dataSetIMR ______________")
  print(dataSetIMR.head())
  print(dataSetIMR.head(-1))
  print("______________________")
  # Find the index of the first non-NaN row across all columns
  first_valid_index = dataSetIMR.dropna().index[0]

  first_valid_location = dataSetIMR.index.get_loc(first_valid_index)

  dataSetIMRCleaned = dataSetIMR.iloc[first_valid_location:]

  print(dataSetIMR[['SPXEMA20', 'DXYEMA200']].dtypes)
  print(dataSetIMR[['SPXEMA20', 'DXYEMA200']])

  # dataSetIMR.fillna(method='ffill', inplace=True)  # ho le chiusure del weekand così risolvo il problema

  dataSetIMRCleaned = dataSetIMRCleaned.select_dtypes(
      include=['float64', 'int64'])
  dataSetIMRCleaned['SPX500-DailyReturn%'] = pd.to_numeric(
      dataSetIMRCleaned['SPX500-DailyReturn%'], errors='coerce')
  dataSetIMRCleaned['DXY-DailyReturn%'] = pd.to_numeric(
      dataSetIMRCleaned['DXY-DailyReturn%'], errors='coerce')

  dataSetIMRCleaned['EOM'].fillna(0, inplace=True)

  dataSet.replace([np.inf, -np.inf], 0, inplace=True)

  pd.set_option('display.max_columns', None)

  print("test dataSetIMR ______________")
  #print(dataSetIMRCleaned)
  #dataSetIMRCleaned.info()
  print("______________________")
  #dataSetIMRCleaned.to_csv('FeatureEngineering&DATA/dataSetIMRCleanedHLC3.csv',index=False)
  #dataSetIMRCleaned.head()
  """
  if extended == False:
     # eliminare le feature non necessarie ottenute con boruta
  """
  return dataSetIMRCleaned

In [59]:
dataSet2 = makeDataSet()
originalDataSet2 = dataSet2.copy()
colDataSet2 = dataSet2.columns.tolist()
# Dataset download
dataSet2.drop(labels=['HLC3'], axis=1, inplace=True)
oc2 = (dataSet2['open']+ dataSet2['close'])/2
dataSet2.insert(0, 'oc2', oc2)
dataSet2.replace([np.inf, -np.inf], np.nan, inplace=True)
dataSet2.fillna(method='ffill', inplace=True)
dataSet2.fillna(method='bfill', inplace=True)
dataSet2.info()
scaler2 = MinMaxScaler()
scalerOC2 = MinMaxScaler()
oc2Array = np.array(dataSet2["oc2"]).reshape(-1, 1)
scalerOC2.fit(oc2Array)
scaler2.fit(dataSet2)
dataSet2 = scaler2.transform(dataSet2)
dataSet2 = pd.DataFrame(dataSet2)

  dataSet["D-VWAP"] = pta.vwap(high=dataSet["high"],
  dataSet["W-VWAP"] = pta.vwap(high=dataSet["high"],


_____ test IMR 1________ 
                   Open         High          Low        Close    Adj Close  \
date                                                                          
2017-01-03  2251.570068  2263.879883  2245.129883  2257.830078  2257.830078   
2017-01-04  2261.600098  2272.820068  2261.600098  2270.750000  2270.750000   
2017-01-05  2268.179932  2271.500000  2260.449951  2269.000000  2269.000000   
2017-01-06  2271.139893  2282.100098  2264.060059  2276.979980  2276.979980   
2017-01-09  2273.590088  2275.489990  2268.899902  2268.899902  2268.899902   

                Volume  
date                    
2017-01-03  3773010000  
2017-01-04  3768890000  
2017-01-05  3785080000  
2017-01-06  3342080000  
2017-01-09  3219730000  
                  Open        High         Low       Close   Adj Close  Volume
date                                                                          
2017-01-03  102.870003  103.820000  102.589996  103.209999  103.209999     0.0
2017-01-

## Models service functions

In [60]:
def compute_loss(model, x, y, kl_reg=0.1):
    y_hat = model.get_NNmodel()(x)
    neg_log_likelihood = -y_hat.log_prob(y)
    return torch.mean(neg_log_likelihood),y_hat

from sklearn.metrics import mean_squared_error

def compute_mse(y_hat, y_test):
    predMeans = y_hat.loc.detach().cpu().numpy()
    mse = mean_squared_error(y_test.detach().cpu().numpy(),predMeans)

    return mse

def getNweights(model):
  nweights = 0
  for name,weights in model.get_NNmodel().named_parameters():
    if 'bias' not in name:
      nweights = nweights + weights.numel()

  return nweights

def flatten_if_nested(lst):

    # Initialize an empty list to store the flattened elements
    flat_list = []

    # Iterate through each element in the input list
    for item in lst:
        # If the element is a list, extend the flat list with the flattened version of this element
        if isinstance(item, list):
            flat_list.extend(flatten_if_nested(item))
        # If the element is not a list, simply append it to the flat list
        else:
            flat_list.append(item)

    return flat_list

class ValidationLossEarlyStopping:
    def __init__(self, patience=1, min_delta=0.0):
        self.patience = patience  # number of times to allow for no improvement before stopping the execution
        self.min_delta = min_delta  # the minimum change to be counted as improvement
        self.counter = 0  # count the number of times the validation accuracy not improving
        self.min_validation_loss = float('inf')

    # return True when validation loss is not decreased by the `min_delta` for `patience` times
    def early_stop_check(self, validation_loss,epoch):
      if epoch > 300:
        if ((validation_loss+self.min_delta) < self.min_validation_loss):
            self.min_validation_loss = validation_loss
            self.counter = 0  # reset the counter if validation loss decreased at least by min_delta
        elif ((validation_loss+self.min_delta) > self.min_validation_loss):
            self.counter += 1 # increase the counter if validation loss is not decreased by the min_delta
            if self.counter >= self.patience:
                return True
        return False
      return False
class TimeSeriesDataset(torch.utils.data.Dataset):
    def __init__(self, X_data, y_data):
        super(). __init__()
        self.X_data = X_data
        self.y_data = y_data


    def __len__(self):
        return len(self.X_data)

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]


def getDataSet(x,y):
    dataSet = TimeSeriesDataset(x, y)
    return dataSet


def getDataLoader(dataSet,model):
  loader = DataLoader(dataset=dataSet,
                          batch_size=int(model.batch_size),
                          shuffle=False)
  return loader

In [61]:

def slidingWindowsAhead(df,windowSize, nStepsAhead,dimTrValDataSet):
  x = []
  y = []
  dataLen = df.shape[0] # n rows
  for i in range(windowSize, dataLen, nStepsAhead):
      if i + nStepsAhead > dataLen:
          break  # If we do not have enough data to predict further, we break the loop.
    # '_x' will contain 'windowSize' values as input data
      _x = df.iloc[i-windowSize:i].values
    # '_y' will contain 'nStepsAhead' values as labels
      _y = df.iloc[i:i+nStepsAhead, 0].values
      x.append(_x)
      y.append(_y)

  # convert windows in tensor
  np.array(x)
  np.array(y)
  trVlWnum = int(len(x) * dimTrValDataSet)
  testWnum = len(x) - trVlWnum  # Remaining data for testing

  trVlX = x[:trVlWnum]
  testX = x[trVlWnum:]
  trVlY = y[:trVlWnum]
  testY = y[trVlWnum:]

  #print("_______ Dim mX & mY ______")
  #print(f"dim mX:{len(x)} dim mY:{len(y)}")
  #print("_____Dimension X & Y _____")
  #print(f"dim trValX:{len(trVlX)} --- dim trValY:{len(trVlY)} ")

  #print(f"dim testX:{len(testX)} --- dim testY:{len(testY)} ")

  # Convert from numpy arrays to torch tensors
  trVlX_tensor = torch.tensor(trVlX, dtype=torch.float32)
  trVlY_tensor = torch.tensor(trVlY, dtype=torch.float32).unsqueeze(1)
  testX_tensor = torch.tensor(testX, dtype=torch.float32)
  testY_tensor = torch.tensor(testY, dtype=torch.float32).unsqueeze(1)

  #print("_____Dimension X & Y tensor _____")
  #print(f"dim TR&VAL X:{trVlX_tensor.shape} --- dim TR&VAL Y:{trVlY_tensor.shape} ")
  #print(f"dim TEST X:{testX_tensor.shape} --- dim TEST Y:{testY_tensor.shape} ")

  #print(f"trVlX_tensor{trVlX_tensor[:10]}")
  #print(f"trVlX_tensor{trVlY_tensor[:10]}")


  return trVlX_tensor,trVlY_tensor,testX_tensor,testY_tensor


### service loops

In [62]:
def testLoop(model, testX_tensor,testY_tensor):
  all_predictionsVL = []
  all_targetsVL = []
  model.get_NNmodel().eval()
  losses_v, mse_v = [], []
  dataSetTS = getDataSet(testX_tensor,testY_tensor)
  test_loader =getDataLoader(dataSetTS,model)
  with torch.no_grad():
    for batch_idx, (data, target) in enumerate(test_loader):
      data, target = data.to(device), target.to(
        device)
      target = target.squeeze(1)
      loss,y_hat = compute_loss(model, data, target)
      losses_v.append(loss.detach().cpu().numpy())
      all_predictionsVL.append(y_hat)
      all_targetsVL.extend(
            target.squeeze().tolist())
  losses_v = np.array(losses_v)
  avg_loss = np.mean(losses_v)

  return avg_loss,all_predictionsVL,all_targetsVL

In [63]:
def trainLoop(model,config,loader,optimizer,all_predictionsTR,all_targetsTR,nweights):
  model.train()
  train_loss = 0
  losses_t, mse_v = [], []
  for _, (data,target) in enumerate(loader):

    optimizer.zero_grad()
    data, target = data.to(config.device), target.to(config.device)
    target = target.squeeze(1)
    # Forward pass ➡
    loss,y_hat = compute_loss(model, data, target)
      # reg L1
    L1_term = torch.tensor(0., requires_grad=True).to(config.device)

    for name, weights in model.named_parameters():
        if 'bias' not in name:
          L1_term += torch.sum(torch.abs(weights))
    L1_term /= nweights
    loss += L1_term * config.l1_lambda
    loss.backward()
    optimizer.step()
    b_val_mse = compute_mse(y_hat, target)
    losses_t.append(loss.detach().cpu().numpy())
    mse_v.append(b_val_mse)
    #means = y_hat.loc.tolist()
    #std_devs = y_hat.scale.tolist()

    all_predictionsTR.append(y_hat)
    all_targetsTR.extend(
            target.squeeze().tolist())

  losses_t = np.array(losses_t)
  avg_loss = np.mean(losses_t)
  avg_mse_v = np.mean(np.array(mse_v))
  return avg_loss,avg_mse_v

In [64]:
def validationLoop(model,config,loader,optimizer,all_predictionsVL,all_targetsVL):
  model.eval()
  losses_v, mse_v = [], []
  with torch.no_grad():
    for _, (data,target) in enumerate(loader):
      data, target = data.to(config.device), target.to(config.device)
      target = target.squeeze(1)
      # Forward pass ➡
      loss,y_hat = compute_loss(model, data, target)
      b_val_mse = compute_mse(y_hat, target)
      losses_v.append(loss.detach().cpu().numpy())
      mse_v.append(b_val_mse)
      #means = y_hat.loc.tolist()
      #std_devs = y_hat.scale.tolist()

      all_predictionsVL.append(y_hat)
      all_targetsVL.extend(
            target.squeeze().tolist())
  losses_v = np.array(losses_v)
  avg_loss = np.mean(losses_v)
  avg_mse_v = np.mean(np.array(mse_v))
  return avg_loss,avg_mse_v

In [65]:
import math
TRLossHist = [] # per ogni epoca ho la loss di tutte le finestre training
TRRmseHist = []

VLLossHist = [] # per ogni epoca ho la loss di tutte le finestre di validazione
VLRmseHist = []

def trWvalWalg(model, trVlX_tensor,trVlY_tensor, optimizer,scheduler, config,earlyStopper,nWeights):
  """ algoritmo per il training e la validazione su rolling windows in continuo """
# aggiungere i run per hyperparameters sweep
  nweights = getNweights(model)
  wandb.watch(model, log="all")


  for epoch in range(config.num_epochs):
    train_loss = 0
    val_loss = 0

    all_avgLossTR = []
    all_avgLossVL = []

    all_avgmseTR = []
    all_avgmseVL = []

    all_predictionsTR = []
    all_targetsTR = []

    all_predictionsVL = []
    all_targetsVL = []

    print(f"_______Epoch {epoch}/{config.num_epochs}:_______ \n")

    for i in range(0, len(trVlX_tensor), config["trW"] + config["vlWl"]):
      train_indices = slice(i, i + config["trW"])
      val_indices = slice(i + config["trW"], i + config["trW"] + config["vlWl"])

      if len(trVlX_tensor[train_indices]) > 0:
        model.train()
        train_dataset = getDataSet(trVlX_tensor[train_indices], trVlY_tensor[train_indices])
        train_loader  = getDataLoader(train_dataset,config)

        # training
        avg_lossTR,avg_rmseTR = trainLoop(model,config,train_loader,optimizer,all_predictionsTR,all_targetsTR,nWeights)

        typeW = "training"
        all_avgLossTR.append(avg_lossTR)
        all_avgmseTR.append(avg_rmseTR)
        wandb.log({"trainLossWindows":avg_lossTR, "trainRmseWindows": avg_rmseTR, "nWindows":i, "Windows type":typeW, "Epoch":epoch })

      if len(trVlX_tensor[val_indices]) > 0:
        model.eval()
        val_dataset = getDataSet(trVlX_tensor[train_indices], trVlY_tensor[train_indices])
        val_loader  = getDataLoader(val_dataset,config)

        #validation
        avg_lossVL,avg_rmseVL = validationLoop(model,config,val_loader,optimizer,all_predictionsVL,all_targetsVL)

        typeW = "validation"
        all_avgLossVL.append(avg_lossVL)
        all_avgmseVL.append(avg_rmseVL)
        wandb.log({"validationLossWindows":avg_lossVL,"ValRmseWindows": avg_rmseVL, "nWindows":i, "Windows type":typeW, "Epoch":epoch })
    if not math.isnan(avg_lossVL):
          scheduler.step(avg_lossVL)

    if earlyStopper.early_stop_check(avg_lossVL,epoch):
      print(f"arlyStopper was triggered at epoch:{epoch}")
      avgLossTr = stat.mean(all_avgLossTR)
      avgmseTr = stat.mean(all_avgmseTR)

      avgLossVl = stat.mean(all_avgLossVL)
      avgmseVl = stat.mean(all_avgmseVL)

      TRLossHist.append(avgLossTr) # per ogni epoca ho la loss di tutte le finestre training
      TRRmseHist.append(avgmseTr)

      VLLossHist.append(avgLossVl) # per ogni epoca ho la loss di tutte le finestre di validazione
      VLRmseHist.append(avgmseVl)

      wandb.log({"trainLossEpoch":avgLossTr,"trainRmseEpoch":avgmseTr,   "Epoch":epoch })
      wandb.log({"valLossEpoch":avgLossVl,"validationRmseEpoch":avgmseVl,  "Epoch":epoch })
      print(f"MeanTrain Loss: {avgLossTr} \n MeanTrain RMSE: {avgmseTr} \n   MeanVal Loss : {avgLossVl} \n  MeanVal RMSE: {avgmseVl} \n")




      return all_predictionsTR,all_targetsTR,all_predictionsVL,all_targetsVL,avgLossTr,avgLossVl,avgmseTr,avgmseVl
    avgLossTr = stat.mean(all_avgLossTR)
    avgmseTr = stat.mean(all_avgmseTR)

    avgLossVl = stat.mean(all_avgLossVL)
    avgmseVl = stat.mean(all_avgmseVL)

    TRLossHist.append(avgLossTr) # per ogni epoca ho la loss di tutte le finestre training
    TRRmseHist.append(avgmseTr)

    VLLossHist.append(avgLossVl) # per ogni epoca ho la loss di tutte le finestre di validazione
    VLRmseHist.append(avgmseVl)

    wandb.log({"trainLossEpoch":avgLossTr,"trainRmseEpoch":avgmseTr,   "Epoch":epoch })
    wandb.log({"valLossEpoch":avgLossVl,"validationRmseEpoch":avgmseVl,  "Epoch":epoch })
    print(f"MeanTrain Loss: {avgLossTr} \n MeanTrain RMSE: {avgmseTr} \n   MeanVal Loss : {avgLossVl} \n  MeanVal RMSE: {avgmseVl} \n")






  return all_predictionsTR,all_targetsTR,all_predictionsVL,all_targetsVL,avgLossTr,avgLossVl,avgmseTr,avgmseVl



## Result Manager

In [66]:
from sklearn.metrics import median_absolute_error,mean_absolute_error,explained_variance_score


class ResultManager():
  def __init__(self,pred,target):
    self.pred = pred
    self.target = target

  def getPred(self):
    return self.predTR


  def getTarget(self):
    return self.targetTR


  def makeDfResult(self):
    pred = flatten_if_nested(self.pred)
    target = flatten_if_nested(self.target)
    means = []
    devs = []
    for y_hat in pred:
      means.extend(y_hat.loc.tolist())
      devs.extend(y_hat.scale.tolist())
    means = flatten_if_nested(means)
    devs = flatten_if_nested(devs)
    df = pd.DataFrame({
      "target":target,
      "mean": means,
      "std": devs,
    })
    return df

  def acper_complessivo(self,val,dfResult):
    threshold = val
    # Calcolo del limite inferiore e superiore
    lower_bound = dfResult["target"] - (threshold * dfResult["target"])
    upper_bound = dfResult["target"] + (threshold * dfResult["target"])
    # Controllo se y_pred è tra i limiti e calcolo la percentuale di True
    matches = (dfResult["mean"] >= lower_bound) & (dfResult["mean"] <= upper_bound)
    # Calcolo la percentuale di corrispondenze soddisfacenti la condizione
    percentuale_acper = (matches.sum() / len(dfResult)) * 100
    return percentuale_acper


  def overallKpiCompute(self,dfResult):


    mse = mean_squared_error(dfResult["target"],dfResult["mean"])
    rmse = mean_squared_error(dfResult["target"],dfResult["mean"], squared=False)
    r2 = r2_score(dfResult["target"],dfResult["mean"])
    mae = mean_absolute_error(dfResult["target"],dfResult["mean"])
    aae = median_absolute_error(dfResult["target"],dfResult["mean"])
    evs = explained_variance_score(dfResult["target"],dfResult["mean"])
    acper2 = self.acper_complessivo(0.02,dfResult)
    acper5 = self.acper_complessivo(0.05,dfResult)
    ape =  np.mean(np.abs((dfResult["target"]- dfResult["mean"])) / np.array(dfResult["target"]), axis=0)

    #  compute accuracy
    real = dfResult["target"]
    predict = dfResult["mean"]
    percentage = 1 - np.sqrt(np.mean(np.square((real - predict) / real)))
    acc =  percentage * 100
    index = [i for i in range(0,1,1)]

    KpiDf = pd.DataFrame({
        "mse": mse,
        "rmse": rmse,
        "r2": r2,
        "mae": mae,
        "evs": evs,
        "acper2": acper2,
        "acper5": acper5,
        "ape": ape,
        "acc": acc,
    },index = index)


    KpiDf = pd.DataFrame(KpiDf)



    return KpiDf


## Make Predictions and KPI computation

## Class model

In [67]:
class Model:
    def __init__(self, config):
      for param_name, param_value in config.items():
          setattr(self, param_name, param_value)
      self.initModel()
      self.score = list()
      self.dfResultTRVL = pd.DataFrame()
      self.dfResultTS = pd.DataFrame()
      self.dfKPI_TRVL = pd.DataFrame()
      self.dfKPI_TS = pd.DataFrame()
      self.resultManagerTRVL  = ResultManager(None,None)
      self.scoreAVG=1

    def initModel(self):
      device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
      print(f"Neuroprobabilistic network: \033[1m{self.model_name}\033[0m is available and ready to face the market  \033[92m✔️\033[0m")



      self.NNmodel = CNNLSTMModel(int(self.input_size), int(self.num_lstm_layers),float(self.dropout),int(self.window_size),int(self.nStepsAhead))
      self.NNmodel.load_state_dict(torch.load(str(self.model_path), map_location=device))
      self.NNmodel = self.NNmodel.to(device)

    def set_dfResultTRVL(self, df):
        self.dfResultTRVL = df

    def set_dfResultTS(self, df):
        self.dfResultTS = df

    def set_dfKPI_TRVL(self, df):
        self.dfKPI_TRVL = df

    def set_dfKPI_TS(self, df):
        self.dfKPI_TS = df

    def set_resultManagerTRVL(self,resultManager):
      self.resultManagerTRVL = resultManager

    def set_resultManagerTS(self,resultManager):
      self.resultManagerTS = resultManager

    def get_NNmodel(self):
      return self.NNmodel

    def __str__(self):
        """Provides a readable string representation of the model."""
        config_str = "\n".join([f"{param}: {getattr(self, param)}" for param in vars(self) if not param.startswith('_')])
        return f"Model Configuration:\n{config_str}"
    # def retrainModels()

    def computeScoreAVG(self):
      dfScore = pd.DataFrame(self.score)
      rolling_mean = dfScore.rolling(3, min_periods=1).mean()
      self.scoreAVG = float(rolling_mean.iloc[-1])  # Extract last element
      return self.scoreAVG




  # nel caso di modelli con la versione ridotta del dataSet bisogna cambiare config["input_size"] = dataSet2.shape[1] con la versione ridotta del dataSet

## Configuration

In [68]:
configFilePath = "/content/drive/MyDrive/Colab Notebooks/NeuroProbabilistico/NeuroProb/ConfigModels.csv"
modelsPTHPath = "/content/drive/MyDrive/Colab Notebooks/NeuroProbabilistico/NeuroProb/NeuroProbPTH/NeuroProbModelsSet/ModelsSet.pth"

# read file and build model using configuration dict
configFile = pd.read_csv(configFilePath, delimiter=';')
dfList =  []
configFile = configFile.T
configFile.columns = configFile.iloc[0]
configFile = configFile.iloc[1:]
configDict = configFile.to_dict(orient='records')
modelList = []
for el in configDict:
  model = Model(el)
  modelList.append(model)
print("All system is \033[92m online \033[0m and ready to face the market \n")
print("\033[1mKATY AI SYSTEM \033[0m is \033[92m online \033[0m and ready to do whatever it takes to face the market!  \033[0m")



Neuroprobabilistic network: [1mRosy[0m is available and ready to face the market  [92m✔️[0m
Neuroprobabilistic network: [1mVisionary[0m is available and ready to face the market  [92m✔️[0m
Neuroprobabilistic network: [1mBloomingSun55[0m is available and ready to face the market  [92m✔️[0m
Neuroprobabilistic network: [1mDeepSweep7[0m is available and ready to face the market  [92m✔️[0m
Neuroprobabilistic network: [1mComicSweep[0m is available and ready to face the market  [92m✔️[0m
Neuroprobabilistic network: [1mPolar[0m is available and ready to face the market  [92m✔️[0m
Neuroprobabilistic network: [1mCoolSweep[0m is available and ready to face the market  [92m✔️[0m
Neuroprobabilistic network: [1mKaty[0m is available and ready to face the market  [92m✔️[0m
All system is [92m online [0m and ready to face the market 

[1mKATY AI SYSTEM [0m is [92m online [0m and ready to do whatever it takes to face the market!  [0m


## Predictions and KPI computation

In [69]:
trVlX_tensor2,trVlY_tensor2,testX_tensor2,testY_tensor2 = slidingWindowsAhead(dataSet2,windowSize = 176,nStepsAhead = 24,dimTrValDataSet = 0.8135103108199945)


dfSetKPI_TRVL = pd.DataFrame()
dfSetKPI_TS = pd.DataFrame()
dfSetPredTS_means = pd.DataFrame()
dfSetPredTS_var = pd.DataFrame()
dfSetPredTRVL_means = pd.DataFrame()
dfSetPredTRVL_var = pd.DataFrame()
targetsTRVL = []
targetsTS = []




for model in modelList:

  avg_lossTRVL,predTRVL,targetTRVL =  testLoop(model, trVlX_tensor2,trVlY_tensor2)
  avg_lossTS2,predTS2,targetTS2 =  testLoop(model, testX_tensor2,testY_tensor2)


  model.set_resultManagerTRVL(ResultManager(predTRVL,targetTRVL))
  model.set_resultManagerTS(ResultManager(predTS2,targetTS2))


  dfResultTRVL= model.resultManagerTRVL.makeDfResult()
  model.set_dfResultTRVL(dfResultTRVL)


  targetsTRVL = pd.DataFrame(scalerOC2.inverse_transform(np.array(dfResultTRVL['target']).reshape(-1, 1)))
  means = scalerOC2.inverse_transform(np.array(dfResultTRVL["mean"]).reshape(-1, 1))
  predictStd = scalerOC2.inverse_transform(np.array(dfResultTRVL["std"]).reshape(-1, 1))
  dfSetPredTRVL_means[f"{model.model_name}"] = pd.DataFrame(means)
  dfSetPredTRVL_var[f"{model.model_name}"] = pd.DataFrame(predictStd)

  dfResultTS = model.resultManagerTS.makeDfResult()
  model.set_dfResultTS(dfResultTS)

  targetsTS = pd.DataFrame(scalerOC2.inverse_transform(np.array(dfResultTS['target']).reshape(-1, 1)))
  means = scalerOC2.inverse_transform(np.array(dfResultTS["mean"]).reshape(-1, 1))
  predictStd = scalerOC2.inverse_transform(np.array(dfResultTS["std"]).reshape(-1, 1))
  dfSetPredTS_means[f"{model.model_name}"] = pd.DataFrame(means)
  dfSetPredTS_var[f"{model.model_name}"] = pd.DataFrame(predictStd)

  kpiTRVL = model.resultManagerTRVL.overallKpiCompute(model.dfResultTRVL)
  model.set_dfKPI_TRVL(kpiTRVL)
  dfSetKPI_TRVL[f"{model.model_name}"] = kpiTRVL.T
  kpiTS = model.resultManagerTS.overallKpiCompute(model.dfResultTS)
  model.set_dfKPI_TS(kpiTS)
  dfSetKPI_TS[f"{model.model_name}"] = kpiTS.T


TSdf = originalDataSet2[(len(originalDataSet2) - len(targetsTS)):]
TRVLdf =  originalDataSet2[:(len(originalDataSet2) - len(targetsTS))]
dfSetPredTRVL_means["target"] = targetsTRVL
dfSetPredTS_means["target"] = targetsTS


### TestSet unsamble KPI

In [70]:
dfSetKPI_TS.head(100)

Unnamed: 0,Rosy,Visionary,BloomingSun55,DeepSweep7,ComicSweep,Polar,CoolSweep,Katy
mse,0.000269,0.000177,0.000394,0.000345,0.00151,0.000366,0.001269,0.000298
rmse,0.016412,0.013322,0.019851,0.018585,0.038859,0.019136,0.035627,0.017259
r2,0.988999,0.992751,0.983906,0.985892,0.938326,0.985044,0.948157,0.987834
mae,0.012092,0.00943,0.015923,0.010198,0.023089,0.01205,0.022999,0.010706
evs,0.989056,0.993991,0.991255,0.987695,0.94814,0.985047,0.950319,0.988545
acper2,43.863816,55.700713,25.999604,59.807997,38.271971,54.740697,27.414885,57.125891
acper5,83.442201,94.893112,72.406968,92.715756,70.031671,85.886778,63.014648,90.380048
ape,0.028362,0.02077,0.036854,0.020822,0.049826,0.025833,0.049823,0.02282
acc,96.413835,97.374051,95.679599,97.157442,92.488696,96.37767,93.241695,96.878043


In [71]:
dfSetPredTRVL_var.head(10)


Unnamed: 0,Rosy,Visionary,BloomingSun55,DeepSweep7,ComicSweep,Polar,CoolSweep,Katy
0,3957.051597,3717.132369,3592.238097,3508.826649,3658.40615,3874.235584,4684.994483,3886.696572
1,4170.305612,3733.00691,3485.267968,3530.797679,5360.368983,3884.093848,3662.746616,3635.711893
2,4142.725453,3720.121509,4344.555593,3538.510104,4488.560627,4034.350075,4931.075044,3638.378774
3,3966.372405,3772.081641,3776.184249,3568.483184,3742.214902,4072.455121,3553.386968,3687.520723
4,4280.711857,3811.660868,3569.818913,3248.187713,3811.249895,3936.714684,3946.591653,3579.99391
5,4026.556619,3813.454267,3513.505724,3563.646037,3753.226103,4010.922881,3823.135821,3579.471233
6,4120.863805,3808.757762,3516.431982,3547.04258,4078.810104,4747.575738,4858.911513,3653.334772
7,4148.156138,3736.582459,3564.422923,3536.723442,4562.012801,4604.732862,4447.008086,3516.641883
8,4574.334029,3707.189216,3528.839538,3549.491287,4236.030449,4119.829961,4105.230982,3634.564843
9,4056.776191,3707.885791,3963.305079,3793.577639,4226.757122,3984.704285,4990.443067,3635.66232


# Law of total variance
$$
Var(Y) = E[Var(Y|X)] + Var(E[Y|X])
$$

 TRVL set law of total variance results

In [72]:
dfSysPredTRVL = pd.DataFrame({
    "mean" : [],
    "var": [],
    "E(var)": [],
    "Var(E)": [],
    "target":[]
})

dfSysPredTS = pd.DataFrame({
    "mean" : [],
    "var": [],
    "E(var)": [],
    "Var(E)": [],
    "target":[]
})

dfSysLastPred = pd.DataFrame({
    "mean" : [],
    "MWvar": [],
    "E(var)": [],
    "Var(E)": [],
})



dfSysPredTRVL_means = dfSetPredTRVL_means.copy()
dfSysPredTRVL_var = dfSetPredTRVL_var.copy()
print(f"{dfSysPredTRVL_means.shape}")

# law of the total variance computation (the mean of prediction mean is weight based on the score error of the models)
for i, (row_means, row_var) in enumerate(zip(dfSysPredTRVL_means.iterrows(), dfSysPredTRVL_var.iterrows())):
    weights = []
    for model in modelList:
        err = abs(row_means[1]["target"] - row_means[1][f"{model.model_name}"])
        model.score.append(1/(err + 1e-10))
        weights.append(model.computeScoreAVG())

    meanOfMeans = np.average(row_means[1][:len(modelList)], weights=weights, axis=0)
    wMeanOfVar = np.average(row_var[1][:len(weights)], weights=weights, axis=0)
    meanOfVar = np.average(row_var[1][:], axis=0)
    varOfMean =  np.var(row_means[1][:len(modelList)], ddof=1,  axis=0)

    dfSysPredTRVL.loc[i,"mean"] = meanOfMeans.item()
    dfSysPredTRVL.loc[i,"var"] = wMeanOfVar.item()
    dfSysPredTRVL.loc[i,"E(var)"] = meanOfVar.item()
    dfSysPredTRVL.loc[i,"Var(E)"] = varOfMean.item()
    dfSysPredTRVL.loc[i,"target"] = row_means[1]["target"].item()


print(dfSysPredTRVL.shape)


dfSysPredTS_means = dfSetPredTS_means.copy()
dfSysPredTS_var = dfSetPredTS_var.copy()
print(f"{dfSysPredTS_means.shape}")

# law of the total variance computation (the mean of prediction mean is weight based on the score error of the models)
for i, (row_means, row_var) in enumerate(zip(dfSysPredTS_means.iterrows(), dfSysPredTS_var.iterrows())):
    weights = []
    for model in modelList:
        err = abs(row_means[1]["target"] - row_means[1][f"{model.model_name}"])
        model.score.append(1/(err + 1e-10))
        weights.append(model.computeScoreAVG())

    meanOfMeans = np.average(row_means[1][:len(modelList)], weights=weights, axis=0)
    wMeanOfVar = np.average(row_var[1][:len(weights)], weights=weights, axis=0)
    meanOfVar = np.average(row_var[1][:], axis=0)
    varOfMean =  np.var(row_means[1][:len(modelList)], ddof=1,  axis=0)

    dfSysPredTS.loc[i,"mean"] = meanOfMeans.item()
    dfSysPredTS.loc[i,"var"] = wMeanOfVar.item()
    dfSysPredTS.loc[i,"E(var)"] = meanOfVar.item()
    dfSysPredTS.loc[i,"Var(E)"] = varOfMean.item()
    dfSysPredTS.loc[i,"target"] = row_means[1]["target"].item()


print(dfSysPredTS.shape)


(43968, 9)


  self.scoreAVG = float(rolling_mean.iloc[-1])  # Extract last element


(43968, 5)
(10104, 9)
(10104, 5)


In [73]:
dfSysPredTRVL.head(-24)

Unnamed: 0,mean,var,E(var),Var(E),target
0,13328.513371,3734.771275,3859.947688,9.144560e+05,13363.720194
1,13366.778601,3769.617800,3932.787439,8.670510e+05,13456.554703
2,13348.689262,3855.174264,4104.784648,8.157522e+05,13591.000350
3,13295.367571,3769.945056,3767.337399,1.072279e+06,13652.455417
4,13455.970060,3736.400150,3773.116187,1.035585e+06,13919.999996
...,...,...,...,...,...
43939,22494.197512,4989.669306,5148.327631,8.357676e+05,22596.924984
43940,22742.852059,3977.731149,4448.953791,1.173837e+06,22633.934056
43941,23282.083103,5115.319540,5033.371509,1.637654e+06,22648.020692
43942,22718.797215,4373.988393,4395.162208,3.629024e+05,22692.588957


In [76]:
dfSysPredTS.head(-24)

Unnamed: 0,mean,var,E(var),Var(E),target
0,23250.358272,4279.821100,4326.382815,6.792611e+05,22984.840084
1,23546.378343,5708.193613,4588.255546,5.189229e+05,22986.905677
2,23526.157845,5132.331698,4885.519084,7.788365e+05,22954.980016
3,22970.269880,4236.890763,4214.606817,2.276997e+05,22966.099061
4,23840.480782,5800.519494,4287.801215,4.349788e+05,23056.524757
...,...,...,...,...,...
10075,60869.754674,7691.335394,8310.123698,9.249946e+06,66687.224242
10076,60279.521787,6312.393071,6810.259845,2.538503e+07,66864.748081
10077,62192.761045,9212.909337,8622.433020,8.674978e+06,66994.761179
10078,61454.989453,6168.880619,6122.571019,2.891228e+06,66982.602011


# Charts

In [82]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from plotly.subplots import make_subplots
import plotly.graph_objects as go


def plotPredChart(means, target, confidence_intervals,TSdf):

    confidence_intervals = np.asarray(confidence_intervals).flatten()
    target = np.asarray(target).flatten()
    means = np.asarray(means).flatten()

    df = pd.DataFrame({
        "means": means,
        "target": target,
        "confidence_intervals": confidence_intervals,
    })

    # Create subplots and mention plot grid size
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True,

               row_width=[0.2, 0.7])

# Plot OHLC on 1st row
    fig.add_trace(go.Candlestick(open=TSdf["open"], high=TSdf["high"],
                low=TSdf["low"], close=TSdf["close"], name="OHLC"),
                row=1, col=1)

    # Plot Predicted and Target values
    fig.add_trace(go.Scatter(x=df.index, y=df["means"], mode='lines', name='Predicted Means'), row=1, col=1)
    fig.add_trace(go.Scatter(x=df.index, y=df["target"], mode='lines', name='Targets'), row=1, col=1)


    # Confidence Intervals
    fig.add_trace(go.Scatter(x=np.arange(len(df)), y=df["means"] - df["confidence_intervals"],
                             mode='lines', fill='tonexty', line=dict(color='rgba(0,100,80,0.2)'),), row=1, col=1)
    fig.add_trace(go.Scatter(x=np.arange(len(df)), y=df["means"] + df["confidence_intervals"],
                             mode='lines', fill='tonexty', line=dict(color='rgba(0,100,80,0.2)'),
                             showlegend=False), row=1, col=1)


    # Update layout

    # Show plot
    fig.show()



dt_index = pd.date_range(start='2018-01-08 00:00:00', end='2024-03-22 15:00:00', periods=len(originalDataSet2))
originalDataSet2.set_index(dt_index)  # Set the DatetimeIndex
targets = (dfSysPredTS['target'])
means = (dfSysPredTS['mean'])
predictStd = np.array(dfSysPredTS['var'])
TSdf = originalDataSet2[(len(originalDataSet2) - len(targets)):]


confidence_level = 0.20
alpha = 1 - confidence_level
z_critical = np.abs(stats.norm.ppf(alpha / 2))
print(z_critical)
OCHL4 = (TSdf["open"] + TSdf["close"] + TSdf["high"] + TSdf["low"]) / 4
OCHL4 = pd.DataFrame(OCHL4)
new_index = pd.RangeIndex(start=0, stop=len(targets))

confidence_intervals = z_critical * predictStd  # Ensure "std" column exists
confidence_intervals = pd.DataFrame(confidence_intervals)



OCHL4 = np.asarray(OCHL4).flatten()
confidence_intervals = np.asarray(confidence_intervals).flatten()
targets = np.asarray(targets).flatten()
means = np.asarray(means).flatten()

df = pd.DataFrame({
    "means": means,
    "targets": targets,
    "confidence_intervals": confidence_intervals,
},index = new_index)


plotPredChart(means[:-1], targets[:-1], confidence_intervals[:-1],TSdf[:-1])

plotPredChart(means[72:144], targets[72:144], confidence_intervals[72:144],TSdf[:-1])
plotPredChart(means[144:1500], targets[144:1500], confidence_intervals[144:1500],TSdf[144:1500])
plotPredChart(means[1500:3000],targets[1500:3000],confidence_intervals[1500:3000],TSdf[1500:3000])
plotPredChart(means[3000:3100],targets[3000:3100],confidence_intervals[3000:3100],TSdf[3000:3100])
plotPredChart(means[3100:3500],targets[3100:3500],confidence_intervals[3100:3500],TSdf[3100:3500])
plotPredChart(means[3500:4500],targets[3500:4500],confidence_intervals[3500:4500],TSdf[3500:4500])
plotPredChart(means[4500:5555],targets[4500:5555],confidence_intervals[4500:5555],TSdf[4500:5555])
plotPredChart(means[5555:6555],targets[5555:6555],confidence_intervals[5555:6555],TSdf[5555:6555])
plotPredChart(means[6555:7555],targets[6555:7555],confidence_intervals[6555:7555],TSdf[6555:7555])
plotPredChart(means[7555:8555],targets[7555:8555],confidence_intervals[7555:8555],TSdf[7555:8555])


0.2533471031357997


# System KPI

 TRVL

In [74]:
sysResultManagerTRVL = ResultManager(dfSysPredTRVL["mean"],dfSysPredTRVL["target"])
dfSysKPI = sysResultManagerTRVL.overallKpiCompute(dfSysPredTRVL)
print("sys TRVL KPI")
dfSysKPI.iloc[:, 1: ].head()

sys TRVL KPI


Unnamed: 0,rmse,r2,mae,evs,acper2,acper5,ape,acc
0,616.701052,0.998672,293.619831,0.998702,78.036299,94.889465,0.014451,97.577756


TS

In [77]:
sysResultManagerTS = ResultManager(dfSysPredTS["mean"],dfSysPredTS["target"])
dfSysKPI = sysResultManagerTS.overallKpiCompute(dfSysPredTS)
print("sys TS KPI")
dfSysKPI.iloc[:, 1: ].head()



sys TS KPI


Unnamed: 0,rmse,r2,mae,evs,acper2,acper5,ape,acc
0,881.176141,0.993569,456.555602,0.994184,82.007126,96.882423,0.012072,98.059086


In [78]:
print("\033[1mKATY AI SYSTEM \033[0m is \033[92m online \033[0m the system is tested and  ready to do whatever it takes to face the market!  \033[0m")
print("This is a \033[1mTheBlock_research's\033[0m product")

[1mKATY AI SYSTEM [0m is [92m online [0m the system is tested and  ready to do whatever it takes to face the market!  [0m
This is a [1mTheBlock_research's[0m product
