In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix

In [2]:
'''DATA LOAD'''
df = pd.read_excel('Final_data_binary.xlsx', sheet_name='AIN')
X = df.drop('ocena', axis=1)
y = np.array(df['ocena'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [3]:
'''SCALING'''
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [4]:
lr = LogisticRegression(random_state=0)

In [5]:
'''GridSearch for best parameters'''
parameters = [{'C': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100], 'penalty': ['l1', 'l2'], 'solver':['liblinear']},]
grid_search = GridSearchCV(estimator=lr, param_grid=parameters, scoring='accuracy', cv=10, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print(f'Best accuracy: {best_accuracy*100}')
print(f'Best parameters: {best_parameters}')

Best accuracy: 84.80872677825104
Best parameters: {'C': 0.001, 'penalty': 'l1', 'solver': 'liblinear'}


In [6]:
'''Prepare LR with best parameters'''
lr = LogisticRegression(random_state=0, C=0.001, penalty='l1', solver='liblinear')
lr.fit(X_train, y_train)
predictions = lr.predict(X_test)

In [8]:
print(np.concatenate((predictions.reshape(len(predictions), 1), y_test.reshape(len(y_test), 1)), 1)[0:100])


[[1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 0]
 [1 0]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [1 1]]


In [46]:
'''Confusion Matrix'''
cm = confusion_matrix(y_test, predictions)
print(cm)

[[   0  247]
 [   0 1335]]
