In [1267]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from minisom import MiniSom

In [1268]:
def calculate_mape(predicted_labels, actual_labels):
    """
    Calculates the Mean Absolute Percentage Error (MAPE) between predicted and actual labels.

    Args:
        predicted_labels (list): List of predicted labels (string).
        actual_labels (list): List of actual labels (string).

    Returns:
        float: Mean Absolute Percentage Error (MAPE).
    """
    assert len(predicted_labels) == len(actual_labels), "Lists must have the same length"

    # Convert string labels to numerical values
    predicted_values = [1 if label == "Up" else 0 for label in predicted_labels]
    actual_values = [1 if label == "Up" else 0 for label in actual_labels]

    # Calculate MAPE using the numerical values
    abs_perc_errors = []
    for pred, actual in zip(predicted_values, actual_values):
        if actual != 0:
            abs_perc_error = abs(actual - pred) / actual * 100
            abs_perc_errors.append(abs_perc_error)

    if len(abs_perc_errors) > 0:
        mape = sum(abs_perc_errors) / len(abs_perc_errors)
    else:
        mape = 0.0

    return mape

In [1269]:
# Load and preprocess data
data = pd.read_csv("BTC-USD.csv", index_col="Date", parse_dates=True)
df = pd.DataFrame(data)
df["Price_Change"] = df["Close"].diff()
df["label"] = (df["Price_Change"] > 0).apply(lambda x: "Up" if x else "Down")
df = df.dropna()

df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Price_Change,label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-05-21,27118.423828,27265.917969,26706.921875,26753.826172,26753.826172,8647416921,-375.759766,Down
2023-05-22,26749.892578,27045.734375,26549.734375,26851.277344,26851.277344,11056770492,97.451172,Up
2023-05-23,26855.960938,27434.683594,26816.179688,27225.726563,27225.726563,13697203143,374.449219,Up
2023-05-24,27224.603516,27224.603516,26106.576172,26334.818359,26334.818359,16299104428,-890.908204,Down
2023-05-25,26329.460938,26591.519531,25890.593750,26476.207031,26476.207031,13851122697,141.388672,Up
...,...,...,...,...,...,...,...,...
2024-05-16,66256.109375,66712.429688,64613.054688,65231.582031,65231.582031,31573077994,-1035.910157,Down
2024-05-17,65231.296875,67459.460938,65119.316406,67051.875000,67051.875000,28031279310,1820.292969,Up
2024-05-18,67066.210938,67387.328125,66663.500000,66940.804688,66940.804688,16712277406,-111.070312,Down
2024-05-19,66937.929688,67694.296875,65937.179688,66278.367188,66278.367188,19249094538,-662.437500,Down


In [1270]:
# Feature extraction and scaling
X = df["Close"].values.reshape(-1, 1)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y = df["label"].values

X_scaled

array([[3.39697734e-02],
       [3.60017492e-02],
       [4.38094723e-02],
       [2.52329493e-02],
       [2.81810756e-02],
       [3.32496733e-02],
       [3.63578083e-02],
       [6.17398519e-02],
       [5.46554055e-02],
       [5.37476444e-02],
       [4.36829396e-02],
       [3.53490083e-02],
       [4.43070508e-02],
       [4.06693274e-02],
       [4.15854779e-02],
       [1.32493214e-02],
       [4.40817192e-02],
       [2.54660594e-02],
       [2.88485183e-02],
       [2.82679828e-02],
       [1.51497553e-02],
       [1.70040071e-02],
       [1.62185840e-02],
       [1.65569684e-02],
       [0.00000000e+00],
       [9.41888708e-03],
       [2.50795788e-02],
       [2.88997911e-02],
       [2.52620270e-02],
       [3.59965771e-02],
       [6.67825484e-02],
       [1.02225632e-01],
       [9.98274154e-02],
       [1.16157830e-01],
       [1.13097425e-01],
       [1.11670501e-01],
       [1.07309868e-01],
       [1.16005519e-01],
       [1.03454795e-01],
       [1.10942582e-01],


In [1271]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [1272]:
# Train the Naive Bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)

In [1273]:
# Make predictions
y_pred_nb = nb.predict(X_test)
print(f"Predicted labels: {y_pred_nb}")
print(f"Actual labels: {y_test}")

Predicted labels: ['Up' 'Down' 'Down' 'Up' 'Down' 'Up' 'Down' 'Down' 'Down' 'Down' 'Up'
 'Down' 'Up' 'Down' 'Down' 'Up' 'Down' 'Up' 'Up' 'Down' 'Down' 'Down'
 'Down' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Down' 'Down' 'Down' 'Up' 'Up'
 'Down' 'Down' 'Down' 'Up' 'Up' 'Up' 'Up' 'Down' 'Up' 'Down' 'Up' 'Down'
 'Down' 'Down' 'Down' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Down' 'Down' 'Up'
 'Down' 'Up' 'Up' 'Down' 'Down' 'Up' 'Down' 'Up' 'Down' 'Up' 'Down' 'Down'
 'Up' 'Up' 'Up']
Actual labels: ['Down' 'Up' 'Down' 'Up' 'Down' 'Up' 'Down' 'Down' 'Up' 'Down' 'Down' 'Up'
 'Down' 'Down' 'Down' 'Up' 'Down' 'Up' 'Down' 'Down' 'Down' 'Down' 'Up'
 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up'
 'Down' 'Up' 'Down' 'Down' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up' 'Down' 'Up'
 'Down' 'Down' 'Up' 'Up' 'Up' 'Down' 'Down' 'Down' 'Down' 'Up' 'Down'
 'Down' 'Down' 'Up' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up' 'Down' 'Up' 'Up'
 'Up' 'Up']


In [1274]:
nb_mape = calculate_mape(y_pred_nb, y_test)
print(f"Nive Bayes Mean Absolute Percentage Error (MAPE): {nb_mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 38.46%


In [1275]:
# Train the Kohonen's SOM
som_shape = (30, 30)
som = MiniSom(som_shape[0], som_shape[1], 1, sigma=1.0, learning_rate=0.2)
som.train(X_train.reshape(-1, 1), 10000)  # Number of iterations
som

<minisom.MiniSom at 0x20d05767380>

In [1276]:
# Make predictions with Kohonen's SOM
y_pred_som = []
for x in X_test:
    winner = som.winner(x.reshape(1, -1))
    y_pred_som.append("Up" if winner[0] >= 5 else "Down")


print(f"Predicted labels: {y_pred_som}")
print(f"Actual labels: {y_test}")

Predicted labels: ['Up', 'Down', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Down', 'Down', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Up', 'Down', 'Down', 'Up', 'Down', 'Up', 'Up', 'Down', 'Down', 'Up', 'Up', 'Up', 'Up', 'Down', 'Up', 'Down', 'Up', 'Down', 'Up', 'Up', 'Down', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Up', 'Up', 'Up']
Actual labels: ['Down' 'Up' 'Down' 'Up' 'Down' 'Up' 'Down' 'Down' 'Up' 'Down' 'Down' 'Up'
 'Down' 'Down' 'Down' 'Up' 'Down' 'Up' 'Down' 'Down' 'Down' 'Down' 'Up'
 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up'
 'Down' 'Up' 'Down' 'Down' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up' 'Down' 'Up'
 'Down' 'Down' 'Up' 'Up' 'Up' 'Down' 'Down' 'Down' 'Down' 'Up' 'Down'
 'Down' 'Down' 'Up' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up' 'Down' 'Up' 'Up'
 'Up' 'Up']


In [1277]:
som_mape = calculate_mape(y_pred_som, y_test)
print(f"SOM Mean Absolute Percentage Error (MAPE): {som_mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 23.08%


In [1278]:
# Combine predictions using a weighted ensemble
nb_weight = 0.49  # Convert to probability (e.g., 0.49)
som_weight = 1 - nb_weight  # Ensure weights sum to 1

y_pred_combined = []
for nb_pred, som_pred in zip(y_pred_nb, y_pred_som):
    combined_pred = nb_weight * (1 if nb_pred == "Up" else 0) + som_weight * (1 if som_pred == "Up" else 0)
    y_pred_combined.append("Up" if combined_pred >= 0.5 else "Down")


print(f"Predicted labels: {y_pred_combined}")
print(f"Actual labels: {y_test}")

print(f"Last Predicted labels: {y_pred_combined[-1]}")
print(f"Last Actual labels: {y_test[-1]}")

Predicted labels: ['Up', 'Down', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Down', 'Down', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Up', 'Down', 'Down', 'Up', 'Down', 'Up', 'Up', 'Down', 'Down', 'Up', 'Up', 'Up', 'Up', 'Down', 'Up', 'Down', 'Up', 'Down', 'Up', 'Up', 'Down', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Up', 'Down', 'Up', 'Up', 'Up', 'Up', 'Up']
Actual labels: ['Down' 'Up' 'Down' 'Up' 'Down' 'Up' 'Down' 'Down' 'Up' 'Down' 'Down' 'Up'
 'Down' 'Down' 'Down' 'Up' 'Down' 'Up' 'Down' 'Down' 'Down' 'Down' 'Up'
 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Up' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up'
 'Down' 'Up' 'Down' 'Down' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up' 'Down' 'Up'
 'Down' 'Down' 'Up' 'Up' 'Up' 'Down' 'Down' 'Down' 'Down' 'Up' 'Down'
 'Down' 'Down' 'Up' 'Down' 'Up' 'Down' 'Up' 'Up' 'Up' 'Down' 'Up' 'Up'
 'Up' 'Up']
Last Predicted labels: Up
Last Actual labe

In [1279]:
com_mape = calculate_mape(y_pred_combined, y_test)
print(f"Combination Mean Absolute Percentage Error (MAPE): {com_mape:.2f}%")

Mean Absolute Percentage Error (MAPE): 23.08%
