In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif, mutual_info_classif
from sklearn import preprocessing as pre
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [2]:
def dataset():
    data = pd.read_csv('final-dataset.csv')
    X = data.drop(columns=['Signal','Label','Hurst Component'])
    y = data['Label']
    return X, y

In [3]:
parameters = {'C':range(1,10), 'kernel':('linear', 'poly', 'rbf', 'sigmoid'),'degree':range(2,5),'gamma':('scale','auto')}

In [4]:
def feature_scaling_min_max(X):
    min_max_scaler = pre.MinMaxScaler()
    X = min_max_scaler.fit_transform(X)
    return X

In [5]:
def feature_scaling_basic(X):
    return pre.scale(X)

In [6]:
max_score = 0
best_params = {}
best_k = 2
best_val_score = 0

for k in range(2,10):
    X, y = dataset()
    X = feature_scaling_min_max(X)
    X_new = SelectKBest(mutual_info_classif, k=k).fit_transform(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.3, random_state=0)
    x_val, x_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=0)
    clf = SVC(random_state=0)
    clf = GridSearchCV(clf, parameters)
    clf.fit(X_train, y_train)
    val_score = clf.score(x_val, y_val)
    score = clf.score(x_test, y_test)
    if(score > max_score):
        max_score = score
        best_k = k
        best_params = clf.best_params_
        best_val_score = val_score
    print("passed ", k, "steps")

print("Test Score is ", max_score)
print("Best params are ", best_params)
print("Best k is ", best_k)
print("Val score is ", best_val_score)

passed  2 steps
passed  3 steps
passed  4 steps
passed  5 steps
passed  6 steps
passed  7 steps
passed  8 steps
passed  9 steps
Test Score is  0.638095238095238
Best params are  {'C': 8, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}
Best k is  6
Val score is  0.6523809523809524


In [None]:
max_score = 0
best_params = {}
best_k = 2
best_val_score = 0

for k in range(2,10):
    X, y = dataset()
    X = feature_scaling_min_max(X)
    X_new = SelectKBest(f_classif, k=k).fit_transform(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.3, random_state=0)
    x_val, x_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=0)
    clf = SVC(random_state=0)
    clf = GridSearchCV(clf, parameters)
    clf.fit(X_train, y_train)
    val_score = clf.score(x_val, y_val)
    score = clf.score(x_test, y_test)
    if(score > max_score):
        max_score = score
        best_k = k
        best_params = clf.best_params_
        best_val_score = val_score
    print("Passed ", k, " steps")

print("Test Score is ", max_score)
print("Best params are ", best_params)
print("Best k is ", best_k)
print("Val score is ", best_val_score)

Passed  2  steps
Passed  3  steps
Passed  4  steps
Passed  5  steps
Passed  6  steps


In [None]:
max_score = 0
best_params = {}
best_k = 2
best_val_score = 0

for k in range(2,10):
    X, y = dataset()
    X = feature_scaling_min_max(X)
    X_new = SelectKBest(chi2, k=k).fit_transform(X, y)
    X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.3, random_state=0)
    x_val, x_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=0)
    clf = SVC(random_state=0)
    clf = GridSearchCV(clf, parameters)
    clf.fit(X_train, y_train)
    val_score = clf.score(x_val, y_val)
    score = clf.score(x_test, y_test)
    if(score > max_score):
        max_score = score
        best_k = k
        best_params = clf.best_params_
        best_val_score = val_score
    print("Passed ", k, " steps")

print("Test Score is ", max_score)
print("Best params are ", best_params)
print("Best k is ", best_k)
print("Val score is ", best_val_score)