In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, GridSearchCV, ShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.exceptions import ConvergenceWarning
from sklearn import set_config
from warnings import simplefilter
simplefilter("ignore", category=ConvergenceWarning)

In [None]:
df= pd.read_csv('fulldata.csv', header=None)
X = df.iloc[:,3:]
y = df.iloc[:,0]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,random_state = (1)) #create random train test split

In [None]:
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
scaler  = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test  = scaler.transform(X_test)

In [None]:
X.describe()

In [None]:
ct = ColumnTransformer(
     [('scaler', StandardScaler(),['t_half'])])

In [None]:
clf1 = LogisticRegression(random_state=1, multi_class='ovr')
clf2 = SVC()
clf3 = KNeighborsClassifier()
clf4 = RandomForestClassifier(random_state= 1)
clf5 = DecisionTreeClassifier(random_state= 1)
clf6 = MLPClassifier()

In [None]:
pipe = Pipeline([('preprocessor', ct), ('classifier', clf1)])

In [None]:
set_config(display='diagram')

In [None]:
pipe

In [None]:
params1 = {}
params1["classifier__penalty"] = ['l2']
params1["classifier__C"] = [0, 1, 3, 5, 10]
params1["classifier__solver"] = ['lbfgs','newton-cg','saga','sag','liblinear']
params1["classifier"] = [clf1]

In [None]:
params2 = {}
params2["classifier__C"] = [0.1, 1, 10, 100, 1000]
params2["classifier__gamma"] = [1, 0.1, 0.01, 0.001, 0.0001]
params2["classifier__kernel"] = ['rbf'] 
params2["classifier"] = [clf2]

In [None]:
params3 = {}
params3["classifier__n_neighbors"] = [1, 3, 5, 10 ,15, 20]
params3["classifier__weights"] = ['uniform', 'distance']
params3["classifier__metric"] = ['euclidean', 'manhattan', 'minkowski']
params3["classifier"] = [clf3]

In [None]:
params4 = {}
params4["classifier__n_estimators"] = [100, 200]
params4["classifier__min_samples_leaf"] = [1,2]
params4["classifier"] = [clf4]

In [None]:
params5 = {}
params5["classifier__max_depth"] = [2, 3, 5, 10, 20]
params5["classifier__min_samples_leaf"] = [5, 10, 20, 50, 100]
params5["classifier__criterion"] = ["gini", "entropy"]
params5["classifier"] = [clf5]

In [None]:
params6 = {}
params6["classifier__hidden_layer_sizes"] = [(10,30,10),(20,)]
params6["classifier__activation"] = ['tanh', 'relu']
params6["classifier__solver"] = ['sgd', 'adam']
params6["classifier__alpha"] =  [0.0001, 0.05]
params6["classifier__learning_rate"] = ['constant','adaptive']
params6["classifier"] = [clf6]

In [None]:
params = [params1, params2, params3, params4, params5, params6]

In [None]:
for param in params:
    grid = GridSearchCV(pipe, param, cv=5)
    grid.fit(X_train, y_train)
    print('Score:', grid.best_score_)
    print('Score:', grid.best_params_)