In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, PredefinedSplit, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [37]:
# train/test set creation

df = pd.read_json("../postdatalinesvectors.json",orient='records',lines=True)
X = df[[str(i) for i in range(300)]]
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.5, random_state=101)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

[[ 0.09164019 -0.14704137  0.15169696 ... -0.37143682  0.52256276
   0.12714074]
 [-0.04951156 -0.2400059   0.33312205 ...  1.36316263 -0.48323734
  -1.33975126]
 [-0.450122    0.5659761   1.0311193  ...  0.89700444  0.01764664
   0.06543488]
 ...
 [ 0.82290611 -0.24662231 -1.87668249 ...  1.881843   -0.35702996
   0.43469591]
 [ 0.19826358  0.50717011  0.05069702 ...  0.35478727 -0.02031197
  -1.28556876]
 [-0.32697836  1.40910033 -0.34769218 ...  0.32431822  0.56313825
  -0.78583354]]


In [21]:
# gridsearch

param_grid = {'C': np.logspace(-3, 3, 7),  
              'gamma': np.logspace(-3, 3, 7), 
              'kernel': ['rbf']}  
cv = 3
scoring = 'accuracy'

grid = GridSearchCV(SVC(random_state=0, probability=True, class_weight='balanced'), param_grid, scoring=scoring, verbose=3, cv=cv, n_jobs=-1)
grid.fit(X_train,y_train)

print(grid.best_params_)
print(grid.best_estimator_)

grid_predict = grid.predict(X_test)

print(classification_report(y_test, grid_predict))
best_params = grid.best_params_
best_estimator = grid.best_estimator_
best_report = classification_report(y_test, grid.predict(X_test))

with open('grid_search_results.txt', 'a') as f:
    f.write(f"Best Params:\n{best_params}\n")
    f.write(f"Best Estimator:\n{best_estimator}\n")
    f.write(f"\nClassification Report:\n{best_report}")
    f.write(f"\nCross validation folds:{cv}\n")
    f.write(f"\nScoring metric:{scoring}\n")

Fitting 3 folds for each of 49 candidates, totalling 147 fits
{'C': 10.0, 'gamma': 0.001, 'kernel': 'rbf'}
SVC(C=10.0, class_weight='balanced', gamma=0.001, probability=True,
    random_state=0)
              precision    recall  f1-score   support

  depression       0.95      0.91      0.93       228
      normal       0.89      0.94      0.91       173

    accuracy                           0.92       401
   macro avg       0.92      0.92      0.92       401
weighted avg       0.92      0.92      0.92       401

