In [1]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold
from sklearn.svm import SVC, SVR
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef, confusion_matrix
import matplotlib.pyplot as plt

In [2]:
# resetting the seeds for reproducibility
def reset_random_seeds():
    n = 1
    os.environ['PYTHONHASHSEED'] = str(n)
    tf.random.set_seed(n)
    np.random.seed(n)
    random.seed(n)

reset_random_seeds()

In [3]:
# import data
df = pd.read_csv('Boruta_onchain_data.csv', parse_dates=True)
df1 = pd.read_csv('all_data.csv', parse_dates=True)

#onchain_data_new
#TA_data
#all_data
#Boruta_data
#Boruta_TA_data
#Boruta_onchain_data


Unnamed: 0,timestamp,90d-coin-days-destroyed-cdd-90,adjusted-sopr-a-sopr,average-coin-dormancy,coin-days-destroyed-cdd,market-value-to-realized-value-ratio-mvrv,mvrv-z-score,net-realized-profit-loss-usd,net-unrealized-profit-loss-nupl,number-of-utxos-in-loss,...,price-ohlc-usd-o,puell-multiple,realized-loss-usd,realized-profit-loss-ratio,realized-profits-to-value-rpv-ratio,relative-unrealized-loss,relative-unrealized-profit,supply-adjusted-cdd,total-supply-in-loss-btc,total-supply-in-profit-btc
0,2012-12-13,254260.282905,1.028255,2.502606,9.431719e+06,1.867095,1.625901,2.927349e+05,0.464531,287854,...,13.699218,0.788782,4.352469e+04,7.725720,0.004334,0.019785,0.484316,0.893750,6.619663e+05,9.890999e+06
1,2012-12-14,254068.529782,1.017767,1.186667,3.388857e+06,1.846665,1.588458,1.333804e+05,0.456186,389415,...,13.731666,0.639479,4.126883e+04,4.231988,0.002246,0.021360,0.477545,0.321047,1.241486e+06,9.314129e+06
2,2012-12-15,253755.221000,1.006894,1.097251,1.756643e+06,1.832728,1.561044,3.217600e+04,0.454525,405622,...,13.547075,0.691617,2.847964e+04,2.129790,0.000779,0.021584,0.476109,0.166372,1.263471e+06,9.295043e+06
3,2012-12-16,249774.400407,1.014167,1.554718,2.701888e+06,1.815466,1.529419,5.433236e+04,0.447717,509838,...,13.514511,0.659471,3.742280e+04,2.451852,0.001177,0.023435,0.471153,0.255829,1.671547e+06,8.889743e+06
4,2012-12-17,243898.529720,0.995911,1.138557,1.947143e+06,1.793560,1.484611,-2.267702e+04,0.443454,568392,...,13.360134,0.779477,9.005393e+04,0.748184,0.000864,0.024595,0.468049,0.184307,1.728292e+06,8.836347e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3841,2023-06-20,113777.202541,0.999152,21.698483,9.508913e+06,1.405627,0.600914,-1.734921e+06,0.305843,27309507,...,26821.524164,1.130898,3.478821e+08,0.995013,0.000884,0.128657,0.434500,0.489953,4.632063e+06,1.442963e+07
3842,2023-06-21,115940.094282,1.032519,37.659274,2.054768e+07,1.486605,0.724447,5.335385e+08,0.343615,21668268,...,28331.725994,1.082598,2.238003e+08,3.383993,0.001931,0.108524,0.452138,1.058691,4.064918e+06,1.498897e+07
3843,2023-06-22,115436.620895,1.029398,15.169519,6.027841e+06,1.478923,0.719527,4.132893e+08,0.340174,22408074,...,30038.303054,1.258383,7.269287e+07,6.685418,0.001238,0.109838,0.450012,0.310563,4.282970e+06,1.478033e+07
3844,2023-06-23,118428.877657,1.030853,50.301580,1.925797e+07,1.512369,0.764022,3.892807e+08,0.354737,20782530,...,29912.159413,1.155140,2.080021e+08,2.871523,0.001520,0.102209,0.456946,0.992157,3.890474e+06,1.517361e+07


In [4]:
df = df[df['timestamp'] >= '2013-03-11'].reset_index(drop=True)
#df1 = df1[df1['timestamp'] >= '03/11/2013']

# for onchain and all
X = df.drop('timestamp', axis=1)

# create binary classification for price movement. this assigns 1 to y if price will move upward next day.
price = pd.DataFrame()
price['today'] = df1['price-ohlc-usd-c']
price['next day'] = price['today'].shift(-1)
y = (price['next day'] > price['today']).astype(int)

# separate training data from testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [5]:
# scale the input data
scaler = StandardScaler()

# Reshape X_train and X_test if they are 1D
if X_train.ndim == 1:
    X_train = X_train.to_numpy().reshape(-1, 1)
if X_test.ndim == 1:
    X_test = X_test.to_numpy().reshape(-1, 1)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# reshape the input data (samples, timesteps, features)
def create_sequences(data, timesteps):
    X = []
    for i in range(len(data) - timesteps + 1):
        X.append(data[i:i + timesteps])
    return np.array(X)

timesteps = 5
X_train_reshaped = create_sequences(X_train_scaled, timesteps)
X_test_reshaped = create_sequences(X_test_scaled, timesteps)
y_train = y_train[timesteps - 1:]
y_test = y_test[timesteps - 1:]

# reshape to 2D
X_train_reshaped = np.reshape(X_train_reshaped, (X_train_reshaped.shape[0], X_train_reshaped.shape[1]*X_train_reshaped.shape[2]))
X_test_reshaped = np.reshape(X_test_reshaped, (X_test_reshaped.shape[0], X_test_reshaped.shape[1]*X_test_reshaped.shape[2]))

In [6]:
# Defining and fitting model
classifier = SVC(kernel='linear', random_state = 0)
classifier.fit(X_train_reshaped, y_train)

# Predicting the testing data
y_pred = classifier.predict(X_test_reshaped)

In [7]:
# Creating confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[338,  37],
       [ 88, 285]])

In [8]:
# evaluate the prediction performance
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-score:", f1_score(y_test, y_pred))
print("AUC-ROC Score:", roc_auc_score(y_test, y_pred))
print("MCC:", matthews_corrcoef(y_test, y_pred))

Accuracy: 0.8328877005347594
Precision: 0.8850931677018633
Recall: 0.7640750670241286
F1-score: 0.8201438848920863
AUC-ROC Score: 0.8327042001787309
MCC: 0.6719324165985338


In [9]:
time = df['timestamp']
time_train, time_test = train_test_split(time, test_size=0.2, shuffle=False)
time_test = time_test[timesteps:]

# Flatten y_pred to be a 1-dimensional array
y_pred_flat = y_pred.flatten()

# Create a DataFrame with columns time_test, y_test, and y_pred
pred_res = pd.DataFrame({'date': time_test, 'actual': y_test.values, 'prediction': y_pred_flat, 'value': price['today'][-748:]})

pred_res.to_csv('pred/svm_uni_data.csv', index=False)