In [1]:
#Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.decomposition import PCA

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn import metrics


from datetime import date, timedelta, datetime
import matplotlib.dates as mdates
import math

In [2]:
#Pre-processing & Setup
#symbol and time period
symbol = 'ETH-USD'
start_date = '2022-01-01'
end_date = '2023-01-01'

#Download
eth_data = yf.download(symbol, start=start_date, end=end_date)

#Feature selection
features = ['Open', 'High', 'Low', 'Volume']
X = eth_data[features]
y = eth_data['Close']

#Normalize the data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(np.array(y).reshape(-1, 1))

#Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

[*********************100%%**********************]  1 of 1 completed


In [3]:
def featureReduction(x_train, x_test, K):
  pca = PCA(n_components = K)
  x_train_tran = pca.fit_transform(x_train)
  x_test_tran = pca.fit_transform(x_test)
  return x_train_tran, x_test_tran

In [4]:
#Value Arrays
high_acc = 0
accuracies = []
precision = []
recall = []
f1 = []

In [5]:
#Calculating Values
for K in range(1,31):
  xtrt, xtet = featureReduction(X_train, X_test, K)
  classifier = SVC(kernel= 'linear', C = 1E4)
  classifier.fit(xtrt, y_train)
  y_prediction = classifier.predict(xtet)

  #Fill Arrays
  accuracies.append(metrics.accuracy_score(y_test,y_prediction))
  precision.append(metrics.precision_score(y_test,y_prediction))
  recall.append(metrics.recall_score(y_test,y_prediction))
  f1.append(metrics.f1_score(y_test,y_prediction))

  #Get Best K
  if(metrics.accuracy_score(y_test,y_prediction) > high_acc):
    high_acc = metrics.accuracy_score(y_test,y_prediction)
    BestK = K

#Display Values @ best K
print("Best K Value" , BestK)
print("Accuracy" , high_acc)

  y = column_or_1d(y, warn=True)


ValueError: Unknown label type: continuous. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.

In [None]:
#Plot Arrays
plt.plot(accuracies, label="Accuracies")
plt.plot(precision, label="Precisions")
plt.plot(recall, label="Recalls")
plt.plot(f1, label="F1 Scores")
plt.xlabel("K value")
plt.ylabel("Metric Scores")
plt.legend()
plt.title("Evaluated metrics for SVM")
plt.show()

In [None]:
#Kernal Tricks
#Linear, Poly, RBF, Sigmoid, & Precomputed
K = 12
c = 1E4

xtrt, xtet = featureReduction(X_train, X_test, K)

#Linear
lin = SVC(kernel= 'linear', C= c)
lin.fit(xtrt, y_train)
lin_pred = lin.predict(xtet)
lin_acc = metrics.accuracy_score(y_test, lin_pred)
print(lin_acc)

#Poly
poly = SVC(kernel= 'poly', C= c)
poly.fit(xtrt, y_train)
poly_pred = poly.predict(xtet)
poly_acc = metrics.accuracy_score(y_test, poly_pred)
print(poly_acc)

#RBF
RBF = SVC(kernel= 'rbf', C= c)
RBF.fit(xtrt, y_train)
RBF_pred = RBF.predict(xtet)
RBF_acc = metrics.accuracy_score(y_test, RBF_pred)
print(RBF_acc)

#Sigmoid
sig = SVC(kernel= 'sigmoid', C= c)
sig.fit(xtrt, y_train)
sig_pred = sig.predict(xtet)
sig_acc = metrics.accuracy_score(y_test, sig_pred)
print(sig_acc)

#Precomputed
#rec = SVC(kernel= 'precomputed', C= c)
#prec.fit(xtrt, y_train)
#prec_pred = prec.predict(xtet)
#prec_acc = metrics.accuracy_score(y_test, prec_pred)
#print(prec_acc)

In [None]:
#Bar Graph Visualization
#Bars
'''
plt.bar(height= lin_acc, x= 1, label= 'Linear')
plt.bar(height= poly_acc, x= 2, label= 'Poly')
plt.bar(height= RBF_acc, x=3, label= 'RBF')
plt.bar(height= sig_acc, x= 4, label= 'Sigmoid')
#General
plt.xlabel("Kernel Tricks")
plt.ylabel("Accuracy")
plt.legend()
plt.title("Accuracies of Different Kernal Tricks")
plt.ylim((0.82,1))
plt.show()
'''