In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, PredefinedSplit, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# train/test set creation

df = pd.read_json("../data/postdatalinesvectors.json",orient='records',lines=True)
X = df[[str(i) for i in range(300)]]
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=101)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# gridsearch

param_grid = {'C': np.logspace(-3, 3, 7),  
              'gamma': np.logspace(-3, 3, 7), 
              'kernel': ['rbf']}  
cv = 3
scoring = 'accuracy'

grid = GridSearchCV(SVC(random_state=0, probability=True, class_weight='balanced'), param_grid, scoring=scoring, verbose=3, cv=cv, n_jobs=-1)
grid.fit(X_train,y_train)

print(grid.best_params_)
print(grid.best_estimator_)

grid_predict = grid.predict(X_test)

print(classification_report(y_test, grid_predict))
best_params = grid.best_params_
best_estimator = grid.best_estimator_
best_report = classification_report(y_test, grid.predict(X_test))

with open('grid_search_results.txt', 'a') as f:
    f.write(f"Best Params:\n{best_params}\n")
    f.write(f"Best Estimator:\n{best_estimator}\n")
    f.write(f"\nClassification Report:\n{best_report}")
    f.write(f"\nCross validation folds:{cv}\n")
    f.write(f"\nScoring metric:{scoring}\n")