In [1]:
import pandas as pd
import polars as pl
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns


In [2]:
df = pl.read_csv('../../Dementia/JanBDRcount.csv')
df = df.to_pandas()

In [3]:
# df.info
for col in df.columns:
    df[col].fillna(3, inplace=True)

X = df.drop(columns=['FID', 'IID', 'PAT', 'MAT', 'SEX', 'PHENOTYPE'])
y = df['PHENOTYPE']


In [4]:
assert X.isnull().sum().sum() == 0, "There are still missing values in X"
assert y.isnull().sum().sum() == 0, "There are still missing values in y"

In [5]:
print(X.isnull().sum().sum())

0


In [6]:
X.head(5)

Unnamed: 0,rs3131972_A,rs11240777_A,rs4970383_A,rs4475691_A,rs13302982_A,rs28391282_A,rs2341354_A,rs9777703_G,rs1891910_A,rs142743151_A,...,rs6009945_C,rs9616810_A,rs9616812_A,rs9616816_A,rs77452243_A,rs2341010_A,rs739365_A,rs6010063_G,rs10451_A,rs2285395_A
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
1,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,2.0,2.0,1.0


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
#Hyperparameter tuning for better prediction of features
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 7, 9],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, verbose=3)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 81 candidates, totalling 243 fits
[CV 1/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=0.704 total time=   2.6s
[CV 2/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=0.702 total time=   2.0s
[CV 3/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=0.702 total time=   2.1s
[CV 1/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=0.704 total time=   2.5s
[CV 2/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=0.702 total time=   2.5s
[CV 3/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=0.702 total time=   2.5s
[CV 1/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=300;, score=0.704 total time=   3.1s
[CV 2/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=300;, score=0.702 total time=   3.2s
[C

In [9]:
# Best parameters with random forest classifier
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

# Train the model with best parameters
best_rf = RandomForestClassifier(**best_params , random_state=42)
best_rf.fit(X_train, y_train)

# Important Features
feature_importances = best_rf.feature_importances_

Best Parameters: {'max_depth': 7, 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 100}


In [57]:
# Visualization

features = X.columns

In [60]:
importances = pd.DataFrame({'feature': features, 'importance': feature_importances})
importances = importances.sort_values(by='importance', ascending=False)


In [61]:
importances.head(-30)

Unnamed: 0,feature,importance
279957,rs157580_G,0.002557
279972,rs429358_C,0.002176
95749,rs4865857_G,0.002115
86968,rs1018139_A,0.002058
118910,rs9345409_A,0.001997
...,...,...
99566,rs115834413_A,0.000000
99574,rs115650403_A,0.000000
99580,rs3777217_A,0.000000
99579,rs11959859_G,0.000000


In [62]:
mean_importance = importances['importance'].mean()
print(mean_importance)
less_important_features = importances[importances['importance'] < mean_importance]['feature']


3.359334583005799e-06


In [73]:
print(less_important_features)



199504     rs2851668_G
198482      rs490589_A
199446     rs9943511_G
198483      rs675578_A
199445     rs1452672_A
              ...     
99588      rs6876015_G
99587     rs61289286_A
99586     rs28926213_G
99585      rs1047420_A
297677     rs2285395_A
Name: feature, Length: 295561, dtype: object
False


In [74]:
df = df.drop(columns=less_important_features)

In [111]:
X = df.drop(columns=['FID', 'IID', 'PAT', 'MAT', 'SEX', 'PHENOTYPE'])
y = df['PHENOTYPE']
y = y.replace({1: 0, 2: 1})


In [112]:
X.shape

(534, 2117)

In [113]:
print(type(y))

<class 'pandas.core.series.Series'>


In [114]:
assert X.isnull().sum().sum() == 0, "There are still missing values in X"
assert y.isnull().sum().sum() == 0, "There are still missing values in y"

In [115]:
print(X.isnull().sum().sum())

0


In [116]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=111)
print(type(y_test))

<class 'pandas.core.series.Series'>


## Random Forest with Selected Features

In [117]:
#Hyperparameter tuning for better prediction of features
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 7, 9],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3, verbose=3)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 81 candidates, totalling 243 fits
[CV 1/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=0.672 total time=   0.1s
[CV 2/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=0.710 total time=   0.1s
[CV 3/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=100;, score=0.694 total time=   0.1s
[CV 1/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=0.664 total time=   0.2s
[CV 2/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=0.694 total time=   0.2s
[CV 3/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=200;, score=0.694 total time=   0.2s
[CV 1/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=300;, score=0.672 total time=   0.2s
[CV 2/3] END max_depth=5, min_samples_leaf=1, min_samples_split=2, n_estimators=300;, score=0.702 total time=   0.2s
[C

In [126]:
best_params = grid_search.best_params_
print(f'Best Parameters: {best_params}')

# Train the model with best parameters
best_rf = RandomForestClassifier(**best_params , random_state=42)
best_rf.fit(X_train, y_train)

# Important Features

y_predict = best_rf.predict(X_test)
y_pred = list(y_predict)
print(y_pred)
print(y_test)
y_test = list(y_test)
count = 0
for i in range(len(y_test)):
    if(y_test[i]==y_pred[i]):
        count+=1

accuracy_score = count/len(y_test)

print(count)
print(len(y_pred))




Best Parameters: {'max_depth': 7, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,

In [123]:
print(accuracy_score)

0.7391304347826086


160.2


## SVM Classifier


In [129]:
from sklearn.svm import SVC

In [142]:
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['linear', 'rbf']
}

svc = SVC(random_state=111)
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_svc = grid_search.best_estimator_
print(f'Best Parameters: {best_svc}')

# Train the model with best parameters
best_model = SVC(C=0.1, gamma=1, kernel='linear', random_state=111)

best_model.fit(X_train, y_train)

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.2s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.2s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.3s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.3s
[CV] END ....................C=0.1, gamma=0.1, kernel=linear; total time=   0.1s
[CV] END ....................C=0.1, gamma=0.1, 

In [143]:
y_predict = best_model.predict(X_test)

y_pred = list(y_predict)
print(y_pred)
print(y_test)
y_test = list(y_test)
count = 0
for i in range(len(y_test)):
    if(y_test[i]==y_pred[i]):
        count+=1

accuracy_score = count/len(y_test)

print(count)
print(len(y_pred))


[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]
[1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1]
126
161


In [144]:
print(accuracy_score)

0.782608695652174


## Logistic Regression


In [150]:
from sklearn.linear_model import LogisticRegression

param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'solver': ['liblinear', 'saga'],
    'max_iter': [100, 200, 500]
}

log_reg = LogisticRegression(random_state=0)
grid_search = GridSearchCV(estimator=log_reg, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best estimator
best_log_reg = grid_search.best_estimator_

print(best_log_reg)

# Predict on the test set with the best estimator
best_model = LogisticRegression(C=1, solver='liblinear', random_state=42)
best_model.fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV] END .............C=0.01, max_iter=100, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=100, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END .............C=0.01, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END .............C=0.01, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END .............C=0.01, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END .............C=0.01, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=200, solver=liblinear; total time=   0.0s




[CV] END ..................C=0.01, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..................C=0.01, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..................C=0.01, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..................C=0.01, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..................C=0.01, max_iter=100, solver=saga; total time=   0.4s
[CV] END .............C=0.01, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END .............C=0.01, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.1, max_iter=100, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.1, max_iter=100, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.1



[CV] END ..................C=0.01, max_iter=200, solver=saga; total time=   0.7s
[CV] END ..............C=0.1, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ..................C=0.01, max_iter=200, solver=saga; total time=   0.8s
[CV] END ..................C=0.01, max_iter=200, solver=saga; total time=   0.8s
[CV] END ..................C=0.01, max_iter=200, solver=saga; total time=   0.8s
[CV] END ..............C=0.1, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.1, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ..............C=0.1, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.1, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ...................C=0.1, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..............C=0.1, max_iter=500, solver=liblinear; total time=   0.1s




[CV] END ...................C=0.1, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..............C=0.1, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ..............C=0.1, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ..............C=0.1, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ..............C=0.1, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ..................C=0.01, max_iter=500, solver=saga; total time=   0.8s
[CV] END ...................C=0.1, max_iter=100, solver=saga; total time=   0.4s




[CV] END ..................C=0.01, max_iter=500, solver=saga; total time=   0.9s
[CV] END ...................C=0.1, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..................C=0.01, max_iter=500, solver=saga; total time=   1.0s
[CV] END ................C=1, max_iter=100, solver=liblinear; total time=   0.0s
[CV] END ................C=1, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ................C=1, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ................C=1, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ...................C=0.1, max_iter=100, solver=saga; total time=   0.4s
[CV] END ................C=1, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ...................C=0.1, max_iter=200, solver=saga; total time=   0.8s
[CV] END ...................C=0.1, max_iter=200, solver=saga; total time=   0.8s




[CV] END ...................C=0.1, max_iter=200, solver=saga; total time=   0.9s




[CV] END .....................C=1, max_iter=100, solver=saga; total time=   0.4s
[CV] END .....................C=1, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..................C=0.01, max_iter=500, solver=saga; total time=   1.0s
[CV] END .....................C=1, max_iter=100, solver=saga; total time=   0.5s
[CV] END ................C=1, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ................C=1, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ................C=1, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ................C=1, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ..................C=0.01, max_iter=500, solver=saga; total time=   0.9s
[CV] END ................C=1, max_iter=200, solver=liblinear; total time=   0.0s




[CV] END .....................C=1, max_iter=100, solver=saga; total time=   0.5s
[CV] END ................C=1, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ...................C=0.1, max_iter=200, solver=saga; total time=   0.8s
[CV] END .....................C=1, max_iter=100, solver=saga; total time=   0.4s
[CV] END ................C=1, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ................C=1, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ................C=1, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ................C=1, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ...................C=0.1, max_iter=200, solver=saga; total time=   0.9s
[CV] END ...............C=10, max_iter=100, solver=liblinear; total time=   0.0s
[CV] END ...............C=10, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ...............C=10, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ...............C=10



[CV] END ...................C=0.1, max_iter=500, solver=saga; total time=   1.9s
[CV] END .....................C=1, max_iter=200, solver=saga; total time=   0.7s
[CV] END .....................C=1, max_iter=200, solver=saga; total time=   0.8s
[CV] END .....................C=1, max_iter=200, solver=saga; total time=   0.8s
[CV] END ...................C=0.1, max_iter=500, solver=saga; total time=   2.0s




[CV] END ....................C=10, max_iter=100, solver=saga; total time=   0.4s
[CV] END ...................C=0.1, max_iter=500, solver=saga; total time=   2.0s
[CV] END ...............C=10, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ...............C=10, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ...............C=10, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ...............C=10, max_iter=200, solver=liblinear; total time=   0.0s
[CV] END ....................C=10, max_iter=100, solver=saga; total time=   0.4s
[CV] END ....................C=10, max_iter=100, solver=saga; total time=   0.4s
[CV] END ...............C=10, max_iter=200, solver=liblinear; total time=   0.0s




[CV] END .....................C=1, max_iter=200, solver=saga; total time=   0.7s
[CV] END ....................C=10, max_iter=100, solver=saga; total time=   0.4s
[CV] END ....................C=10, max_iter=100, solver=saga; total time=   0.4s
[CV] END .....................C=1, max_iter=200, solver=saga; total time=   0.9s
[CV] END ...............C=10, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ...............C=10, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ...............C=10, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ...............C=10, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ...............C=10, max_iter=500, solver=liblinear; total time=   0.1s




[CV] END ....................C=10, max_iter=200, solver=saga; total time=   0.7s
[CV] END .....................C=1, max_iter=500, solver=saga; total time=   1.8s
[CV] END .....................C=1, max_iter=500, solver=saga; total time=   1.8s




[CV] END .....................C=1, max_iter=500, solver=saga; total time=   1.9s
[CV] END ..............C=100, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ....................C=10, max_iter=200, solver=saga; total time=   0.7s
[CV] END ..............C=100, max_iter=100, solver=liblinear; total time=   0.0s
[CV] END ....................C=10, max_iter=200, solver=saga; total time=   0.7s
[CV] END ..............C=100, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ..............C=100, max_iter=100, solver=liblinear; total time=   0.0s
[CV] END ..............C=100, max_iter=100, solver=liblinear; total time=   0.1s
[CV] END ...................C=0.1, max_iter=500, solver=saga; total time=   1.9s




[CV] END ...................C=100, max_iter=100, solver=saga; total time=   0.4s
[CV] END ...................C=0.1, max_iter=500, solver=saga; total time=   1.8s
[CV] END ..............C=100, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ....................C=10, max_iter=200, solver=saga; total time=   0.8s




[CV] END ...................C=100, max_iter=100, solver=saga; total time=   0.4s
[CV] END ..............C=100, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ..............C=100, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ..............C=100, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ....................C=10, max_iter=200, solver=saga; total time=   0.8s
[CV] END ..............C=100, max_iter=200, solver=liblinear; total time=   0.1s
[CV] END ...................C=100, max_iter=100, solver=saga; total time=   0.4s




[CV] END ...................C=100, max_iter=100, solver=saga; total time=   0.5s
[CV] END ..............C=100, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ....................C=10, max_iter=500, solver=saga; total time=   1.8s
[CV] END ...................C=100, max_iter=100, solver=saga; total time=   0.4s
[CV] END ....................C=10, max_iter=500, solver=saga; total time=   1.8s
[CV] END ..............C=100, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ..............C=100, max_iter=500, solver=liblinear; total time=   0.1s
[CV] END ..............C=100, max_iter=500, solver=liblinear; total time=   0.0s
[CV] END ..............C=100, max_iter=500, solver=liblinear; total time=   0.1s




[CV] END ...................C=100, max_iter=200, solver=saga; total time=   0.7s
[CV] END ...................C=100, max_iter=200, solver=saga; total time=   0.7s
[CV] END ...................C=100, max_iter=200, solver=saga; total time=   0.8s
[CV] END ....................C=10, max_iter=500, solver=saga; total time=   1.9s




[CV] END .....................C=1, max_iter=500, solver=saga; total time=   1.9s
[CV] END .....................C=1, max_iter=500, solver=saga; total time=   1.9s




[CV] END ...................C=100, max_iter=200, solver=saga; total time=   0.7s
[CV] END ...................C=100, max_iter=200, solver=saga; total time=   0.7s




[CV] END ....................C=10, max_iter=500, solver=saga; total time=   1.6s
[CV] END ...................C=100, max_iter=500, solver=saga; total time=   1.6s
[CV] END ...................C=100, max_iter=500, solver=saga; total time=   1.7s
[CV] END ...................C=100, max_iter=500, solver=saga; total time=   1.7s
[CV] END ...................C=100, max_iter=500, solver=saga; total time=   1.6s
[CV] END ....................C=10, max_iter=500, solver=saga; total time=   1.5s




[CV] END ...................C=100, max_iter=500, solver=saga; total time=   1.5s
LogisticRegression(C=1, random_state=0, solver='liblinear')


In [152]:
y_predict = best_model.predict(X_test)
y_pred = list(y_predict)
print(y_pred)
print(y_test)
y_test = list(y_test)
count = 0
for i in range(len(y_test)):
    if(y_test[i]==y_pred[i]):
        count+=1

accuracy_score = count/len(y_test)

print(count)
print(len(y_pred))

[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]
[1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1]
124
161


In [153]:
print(accuracy_score)

0.7701863354037267
