# Logistic Regression

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
X, y = load_iris(return_X_y=True)
clf = LogisticRegression(random_state=0).fit(X, y)

In [None]:
clf.predict(X[:2, :])

In [None]:
clf.predict_proba(X[:2, :])

In [None]:
clf.score(X, y)

# Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y, clf.predict(X))

In [None]:
confusion_matrix(y, clf.predict(X), normalize='true')

In [None]:
i

# 1. Solve classification problem using 'classification.csv' dataset

##### target variable is 'default'. Apply feature selection, feature scaling, cross validation etc. (anything you think is needed)

In [139]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [169]:
data=pd.read_csv("classification.csv")
y=data['default']
data_x=pd.get_dummies(data[data.columns.difference(['default'])])


In [180]:
#seperate 15 rows by hand for test
data_hand=data_x[data_x.index%50==0].copy() #x-s
y_hand=y[y.index%50==0].copy() #y-s

In [90]:
from sklearn.model_selection import cross_val_score, cross_val_predict

In [182]:
#cross validation
logreg = LogisticRegression(random_state=0).fit(data_x, y)
pred=cross_val_predict(logreg, data_x, y, cv=5)
pred_hand=cross_val_predict(logreg, data_hand, y_hand, cv=5)
confusion_matrix(y, pred), confusion_matrix(y_hand, pred_hand)

(array([[474,  43],
        [ 91,  92]], dtype=int64),
 array([[7, 2],
        [3, 2]], dtype=int64))

In [183]:
#train on all datas
logreg = LogisticRegression(random_state=0).fit(data_x, y)
pred=logreg.predict(data_x)
pred_hand=logreg.predict(data_test)
confusion_matrix(y, pred), confusion_matrix(y_hand, pred_hand)

(array([[479,  38],
        [ 91,  92]], dtype=int64),
 array([[8, 1],
        [3, 2]], dtype=int64))

In [181]:
#split method 
x_train,x_test,y_train,y_test=train_test_split(data_x, y, test_size=0.2, random_state=1)
logreg = LogisticRegression(random_state=0).fit(x_train, y_train)
pred_train=logreg.predict(x_train)
pred_test=logreg.predict(x_test)
pred_hand=logreg.predict(data_hand)
print(confusion_matrix(y_train, pred_train))
print(confusion_matrix(y_test, pred_test))
confusion_matrix(y_hand, pred_hand) 


[[381  33]
 [ 73  73]]
[[98  5]
 [17 20]]


array([[9, 0],
       [3, 2]], dtype=int64)

# 2. Print accuracy, confusion matrix, precision, recall, sensitivity and specifity on train and test (and maybe validation) datasets.

##### do not use any libraries for metrics, implement yourself

In [238]:
import numpy as np

In [251]:
#create confusion matrix
cmtr=np.array([[0,0],[0,0]])
cmtst=np.array([[0,0],[0,0]])
cmtsth=np.array([[0,0],[0,0]])
#train
for pred, exp in zip(pred_train,  y_train):
    cmtr[pred][exp] += 1
cmtr=cmtr.transpose()

#test
for pred, exp in zip(pred_test,  y_test):
    cmtst[pred][exp] += 1
cmtst=cmtst.transpose()

#hand
for pred, exp in zip(pred_hand,  y_hand):
    cmtsth[pred][exp] += 1
cmtsth=cmtsth.transpose()

print(confusion_matrix(y_train, pred_train))
print(confusion_matrix(y_test, pred_test))
print(confusion_matrix(y_hand, pred_hand))
cmtr,cmtst,cmtsth

[[381  33]
 [ 73  73]]
[[98  5]
 [17 20]]
[[8 1]
 [3 2]]


(array([[381,  33],
        [ 73,  73]]),
 array([[98,  5],
        [17, 20]]),
 array([[8, 1],
        [3, 2]]))

In [266]:
from sklearn import metrics
print("Conf Matrix Train: \n",cmtr)
print("Conf Matrix Test: \n",cmtst)
print("Conf Matrix Hand: \n",cmtsth) 
accuracy_train=cmtr.diagonal().sum()/len(y_train)
accuracy_test=cmtst.diagonal().sum()/len(y_test)
recall_train_p=cmtr[0][0]/cmtr[0].sum()
recall_test_p=cmtst[0][0]/cmtst[0].sum()
recall_train_n=cmtr[1][1]/cmtr[1].sum()
recall_test_n=cmtst[1][1]/cmtst[1].sum()
prec_train_p=cmtr[0][0]/cmtr[:,0].sum()
prec_test_p=cmtst[0][0]/cmtst[:,0].sum()
prec_train_n=cmtr[1][1]/cmtr[:,1].sum()
prec_test_n=cmtst[1][1]/cmtst[:,1].sum()
print("Accuracy Train:",accuracy_train)
print("Accuracy Test:",accuracy_test)
print("Recall Train Positive - Sensitivity :",recall_train_p)
print("Recall Test Positive - Sensitivity :",recall_test_p)
print("Recall Train Negative - Specificity:",recall_train_n)
print("Recall Test Negative - Specificity:",recall_test_n)
print("Precision Train Positive:",prec_train_p)
print("Precision Test Positive:",prec_test_p)
print("Precision Train Negative:",prec_train_n)
print("Precision Test Negative:",prec_test_n)
# print("Accuracy Train:",metrics.accuracy_score(y_train, pred_train))
# print("Accuracy Test:",metrics.accuracy_score(y_test, pred_test))
# print("Precision Test:",metrics.precision_score(y_test, pred_test, average=None))
# print("Precision Train:",metrics.precision_score(y_train, pred_train, average=None))
# print("Precision Test:",metrics.precision_score(y_test, pred_test))
# print("Recall:",metrics.recall_score(y_train, pred_train))
# print("Recall:",metrics.recall_score(y_test, pred_test))

Conf Matrix Train: 
 [[381  33]
 [ 73  73]]
Conf Matrix Test: 
 [[98  5]
 [17 20]]
Conf Matrix Hand: 
 [[8 1]
 [3 2]]
Accuracy Train: 0.8107142857142857
Accuracy Test: 0.8428571428571429
Recall Train Positive - Sensitivity : 0.9202898550724637
Recall Test Positive - Sensitivity : 0.9514563106796117
Recall Train Negative - Specificity: 0.5
Recall Test Negative - Specificity: 0.5405405405405406
Precision Train Positive: 0.8392070484581498
Precision Test Positive: 0.8521739130434782
Precision Train Negative: 0.6886792452830188
Precision Test Negative: 0.8
