In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load dataset
df = pd.read_csv("crypto_hourly_technical_features.csv")

# Create binary target variable (price goes up or down)
df['target'] = (df['close'] > df['open']).astype(int)

# Drop columns not needed for modeling
drop_cols = ['id', 'symbol', 'name', 'datetime', 'open', 'high', 'low', 'close', 'target']
features = df.drop(columns=drop_cols, errors='ignore')
target = df['target']

# Drop rows with missing values
features = features.dropna()
target = target.loc[features.index]

# Loop through each coin symbol
for coin in df['symbol'].unique():
    print(f"Training model for {coin}...")
    
    # Filter the data for the current coin
    coin_data = df[df['symbol'] == coin]
    coin_features = features.loc[coin_data.index]
    coin_target = target.loc[coin_data.index]
    
    # Train-test split for the current coin
    X_train, X_test, y_train, y_test = train_test_split(coin_features, coin_target, test_size=0.2, random_state=42)
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train Logistic Regression model
    logreg = LogisticRegression(max_iter=1000)
    logreg.fit(X_train_scaled, y_train)
    logreg_preds = logreg.predict(X_test_scaled)
    
    print(f"\nLogistic Regression for {coin}")
    print("Accuracy:", accuracy_score(y_test, logreg_preds))
    print("Confusion Matrix:\n", confusion_matrix(y_test, logreg_preds))
    print(classification_report(y_test, logreg_preds))
    
    # Train Linear SVM model
    linear_svm = LinearSVC(max_iter=10000)
    linear_svm.fit(X_train_scaled, y_train)
    linear_preds = linear_svm.predict(X_test_scaled)
    
    print(f"\nLinear SVM for {coin}")
    print("Accuracy:", accuracy_score(y_test, linear_preds))
    print("Confusion Matrix:\n", confusion_matrix(y_test, linear_preds))
    print(classification_report(y_test, linear_preds))
    
    # Train RBF SVM model (using a smaller sample size for faster training)
    sample_size = 50000
    X_train_small = X_train_scaled[:sample_size]
    y_train_small = y_train[:sample_size]
    
    rbf_svm = SVC(kernel='rbf')
    rbf_svm.fit(X_train_small, y_train_small)
    rbf_preds = rbf_svm.predict(X_test_scaled)
    
    print(f"\nRBF SVM for {coin}")
    print("Accuracy:", accuracy_score(y_test, rbf_preds))
    print("Confusion Matrix:\n", confusion_matrix(y_test, rbf_preds))
    print(classification_report(y_test, rbf_preds))
    
    print("="*50)  # Separator between different coin results


Training model for AAVE...

Logistic Regression for AAVE
Accuracy: 0.8410220014194464
Confusion Matrix:
 [[1789  307]
 [ 365 1766]]
              precision    recall  f1-score   support

           0       0.83      0.85      0.84      2096
           1       0.85      0.83      0.84      2131

    accuracy                           0.84      4227
   macro avg       0.84      0.84      0.84      4227
weighted avg       0.84      0.84      0.84      4227






Linear SVM for AAVE
Accuracy: 0.8381831085876508
Confusion Matrix:
 [[1784  312]
 [ 372 1759]]
              precision    recall  f1-score   support

           0       0.83      0.85      0.84      2096
           1       0.85      0.83      0.84      2131

    accuracy                           0.84      4227
   macro avg       0.84      0.84      0.84      4227
weighted avg       0.84      0.84      0.84      4227


RBF SVM for AAVE
Accuracy: 0.8495386799148332
Confusion Matrix:
 [[1799  297]
 [ 339 1792]]
              precision    recall  f1-score   support

           0       0.84      0.86      0.85      2096
           1       0.86      0.84      0.85      2131

    accuracy                           0.85      4227
   macro avg       0.85      0.85      0.85      4227
weighted avg       0.85      0.85      0.85      4227

Training model for ADA...

Logistic Regression for ADA
Accuracy: 0.887940234791889
Confusion Matrix:
 [[3542  138]
 [ 387  618]]
              precision    r




Linear SVM for ADA
Accuracy: 0.8951974386339381
Confusion Matrix:
 [[3548  132]
 [ 359  646]]
              precision    recall  f1-score   support

           0       0.91      0.96      0.94      3680
           1       0.83      0.64      0.72      1005

    accuracy                           0.90      4685
   macro avg       0.87      0.80      0.83      4685
weighted avg       0.89      0.90      0.89      4685


RBF SVM for ADA
Accuracy: 0.9180362860192103
Confusion Matrix:
 [[3511  169]
 [ 215  790]]
              precision    recall  f1-score   support

           0       0.94      0.95      0.95      3680
           1       0.82      0.79      0.80      1005

    accuracy                           0.92      4685
   macro avg       0.88      0.87      0.88      4685
weighted avg       0.92      0.92      0.92      4685

Training model for BTC...

Logistic Regression for BTC
Accuracy: 0.8568840579710145
Confusion Matrix:
 [[6350 1190]
 [1022 6894]]
              precision    re




Linear SVM for BTC
Accuracy: 0.8083592132505176
Confusion Matrix:
 [[5906 1634]
 [1328 6588]]
              precision    recall  f1-score   support

           0       0.82      0.78      0.80      7540
           1       0.80      0.83      0.82      7916

    accuracy                           0.81     15456
   macro avg       0.81      0.81      0.81     15456
weighted avg       0.81      0.81      0.81     15456


RBF SVM for BTC
Accuracy: 0.8880693581780539
Confusion Matrix:
 [[6614  926]
 [ 804 7112]]
              precision    recall  f1-score   support

           0       0.89      0.88      0.88      7540
           1       0.88      0.90      0.89      7916

    accuracy                           0.89     15456
   macro avg       0.89      0.89      0.89     15456
weighted avg       0.89      0.89      0.89     15456

Training model for DOGE...

Logistic Regression for DOGE
Accuracy: 0.9314404432132964
Confusion Matrix:
 [[1195   38]
 [  61  150]]
              precision    




Linear SVM for DOGE
Accuracy: 0.9376731301939059
Confusion Matrix:
 [[1187   46]
 [  44  167]]
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      1233
           1       0.78      0.79      0.79       211

    accuracy                           0.94      1444
   macro avg       0.87      0.88      0.88      1444
weighted avg       0.94      0.94      0.94      1444


RBF SVM for DOGE
Accuracy: 0.9439058171745153
Confusion Matrix:
 [[1192   41]
 [  40  171]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      1233
           1       0.81      0.81      0.81       211

    accuracy                           0.94      1444
   macro avg       0.89      0.89      0.89      1444
weighted avg       0.94      0.94      0.94      1444

Training model for DOT...

Logistic Regression for DOT
Accuracy: 0.8294298556896144
Confusion Matrix:
 [[1998  318]
 [ 403 1508]]
              precision    




Linear SVM for DOT
Accuracy: 0.8237520700260231
Confusion Matrix:
 [[1988  328]
 [ 417 1494]]
              precision    recall  f1-score   support

           0       0.83      0.86      0.84      2316
           1       0.82      0.78      0.80      1911

    accuracy                           0.82      4227
   macro avg       0.82      0.82      0.82      4227
weighted avg       0.82      0.82      0.82      4227


RBF SVM for DOT
Accuracy: 0.8440974686538917
Confusion Matrix:
 [[2015  301]
 [ 358 1553]]
              precision    recall  f1-score   support

           0       0.85      0.87      0.86      2316
           1       0.84      0.81      0.82      1911

    accuracy                           0.84      4227
   macro avg       0.84      0.84      0.84      4227
weighted avg       0.84      0.84      0.84      4227

Training model for EOS...

Logistic Regression for EOS
Accuracy: 0.8864244315648685
Confusion Matrix:
 [[5361  415]
 [ 604 2592]]
              precision    re




Linear SVM for EOS
Accuracy: 0.8730494872938029
Confusion Matrix:
 [[5380  396]
 [ 743 2453]]
              precision    recall  f1-score   support

           0       0.88      0.93      0.90      5776
           1       0.86      0.77      0.81      3196

    accuracy                           0.87      8972
   macro avg       0.87      0.85      0.86      8972
weighted avg       0.87      0.87      0.87      8972


RBF SVM for EOS
Accuracy: 0.9021399910833705
Confusion Matrix:
 [[5323  453]
 [ 425 2771]]
              precision    recall  f1-score   support

           0       0.93      0.92      0.92      5776
           1       0.86      0.87      0.86      3196

    accuracy                           0.90      8972
   macro avg       0.89      0.89      0.89      8972
weighted avg       0.90      0.90      0.90      8972

Training model for ETH...

Logistic Regression for ETH
Accuracy: 0.8642122360584732
Confusion Matrix:
 [[4012  649]
 [ 605 3969]]
              precision    re




Linear SVM for ETH
Accuracy: 0.8516513264753655
Confusion Matrix:
 [[3953  708]
 [ 662 3912]]
              precision    recall  f1-score   support

           0       0.86      0.85      0.85      4661
           1       0.85      0.86      0.85      4574

    accuracy                           0.85      9235
   macro avg       0.85      0.85      0.85      9235
weighted avg       0.85      0.85      0.85      9235


RBF SVM for ETH
Accuracy: 0.8782891174878181
Confusion Matrix:
 [[4098  563]
 [ 561 4013]]
              precision    recall  f1-score   support

           0       0.88      0.88      0.88      4661
           1       0.88      0.88      0.88      4574

    accuracy                           0.88      9235
   macro avg       0.88      0.88      0.88      9235
weighted avg       0.88      0.88      0.88      9235

Training model for LINK...

Logistic Regression for LINK
Accuracy: 0.8535578669308238
Confusion Matrix:
 [[2850  441]
 [ 446 2320]]
              precision    




Linear SVM for LINK
Accuracy: 0.8512464916625392
Confusion Matrix:
 [[2847  444]
 [ 457 2309]]
              precision    recall  f1-score   support

           0       0.86      0.87      0.86      3291
           1       0.84      0.83      0.84      2766

    accuracy                           0.85      6057
   macro avg       0.85      0.85      0.85      6057
weighted avg       0.85      0.85      0.85      6057


RBF SVM for LINK
Accuracy: 0.8599966980353311
Confusion Matrix:
 [[2873  418]
 [ 430 2336]]
              precision    recall  f1-score   support

           0       0.87      0.87      0.87      3291
           1       0.85      0.84      0.85      2766

    accuracy                           0.86      6057
   macro avg       0.86      0.86      0.86      6057
weighted avg       0.86      0.86      0.86      6057

Training model for LTC...

Logistic Regression for LTC
Accuracy: 0.8740524149880875
Confusion Matrix:
 [[4096  619]
 [ 544 3975]]
              precision    




Linear SVM for LTC
Accuracy: 0.8688542343513104
Confusion Matrix:
 [[4072  643]
 [ 568 3951]]
              precision    recall  f1-score   support

           0       0.88      0.86      0.87      4715
           1       0.86      0.87      0.87      4519

    accuracy                           0.87      9234
   macro avg       0.87      0.87      0.87      9234
weighted avg       0.87      0.87      0.87      9234


RBF SVM for LTC
Accuracy: 0.8809833225037903
Confusion Matrix:
 [[4128  587]
 [ 512 4007]]
              precision    recall  f1-score   support

           0       0.89      0.88      0.88      4715
           1       0.87      0.89      0.88      4519

    accuracy                           0.88      9234
   macro avg       0.88      0.88      0.88      9234
weighted avg       0.88      0.88      0.88      9234

Training model for UNI...

Logistic Regression for UNI
Accuracy: 0.8472090823084201
Confusion Matrix:
 [[1998  302]
 [ 344 1584]]
              precision    re




Linear SVM for UNI
Accuracy: 0.8443708609271523
Confusion Matrix:
 [[1998  302]
 [ 356 1572]]
              precision    recall  f1-score   support

           0       0.85      0.87      0.86      2300
           1       0.84      0.82      0.83      1928

    accuracy                           0.84      4228
   macro avg       0.84      0.84      0.84      4228
weighted avg       0.84      0.84      0.84      4228


RBF SVM for UNI
Accuracy: 0.8557237464522233
Confusion Matrix:
 [[2027  273]
 [ 337 1591]]
              precision    recall  f1-score   support

           0       0.86      0.88      0.87      2300
           1       0.85      0.83      0.84      1928

    accuracy                           0.86      4228
   macro avg       0.86      0.85      0.85      4228
weighted avg       0.86      0.86      0.86      4228

Training model for USDC...

Logistic Regression for USDC
Accuracy: 0.9120879120879121
Confusion Matrix:
 [[74  4]
 [ 4  9]]
              precision    recall  




RBF SVM for USDT
Accuracy: 0.9837728194726166
Confusion Matrix:
 [[412   3]
 [  5  73]]
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       415
           1       0.96      0.94      0.95        78

    accuracy                           0.98       493
   macro avg       0.97      0.96      0.97       493
weighted avg       0.98      0.98      0.98       493

Training model for XLM...

Logistic Regression for XLM
Accuracy: 0.9405852793378658
Confusion Matrix:
 [[2797   80]
 [ 121  385]]
              precision    recall  f1-score   support

           0       0.96      0.97      0.97      2877
           1       0.83      0.76      0.79       506

    accuracy                           0.94      3383
   macro avg       0.89      0.87      0.88      3383
weighted avg       0.94      0.94      0.94      3383






Linear SVM for XLM
Accuracy: 0.9544782737215489
Confusion Matrix:
 [[2801   76]
 [  78  428]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      2877
           1       0.85      0.85      0.85       506

    accuracy                           0.95      3383
   macro avg       0.91      0.91      0.91      3383
weighted avg       0.95      0.95      0.95      3383


RBF SVM for XLM
Accuracy: 0.9524091043452557
Confusion Matrix:
 [[2801   76]
 [  85  421]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97      2877
           1       0.85      0.83      0.84       506

    accuracy                           0.95      3383
   macro avg       0.91      0.90      0.91      3383
weighted avg       0.95      0.95      0.95      3383

Training model for XRP...

Logistic Regression for XRP
Accuracy: 0.9341508483117755
Confusion Matrix:
 [[4679  170]
 [ 222  882]]
              precision    re




Linear SVM for XRP
Accuracy: 0.9371745338484797
Confusion Matrix:
 [[4671  178]
 [ 196  908]]
              precision    recall  f1-score   support

           0       0.96      0.96      0.96      4849
           1       0.84      0.82      0.83      1104

    accuracy                           0.94      5953
   macro avg       0.90      0.89      0.90      5953
weighted avg       0.94      0.94      0.94      5953


RBF SVM for XRP
Accuracy: 0.9398622543255502
Confusion Matrix:
 [[4684  165]
 [ 193  911]]
              precision    recall  f1-score   support

           0       0.96      0.97      0.96      4849
           1       0.85      0.83      0.84      1104

    accuracy                           0.94      5953
   macro avg       0.90      0.90      0.90      5953
weighted avg       0.94      0.94      0.94      5953

