Demo for usage of Linear classifier

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [2]:
# load data
data = pd.read_csv("diabetes_new.csv")
data.head(5)

Unnamed: 0.1,Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0,6,148.0,72.0,35.0,155.0,33.6,0.627,50,1
1,1,1,85.0,66.0,29.0,155.0,26.6,0.351,31,0
2,2,8,183.0,64.0,29.0,155.0,23.3,0.672,32,1
3,3,1,89.0,66.0,23.0,94.0,28.1,0.167,21,0
4,4,0,137.0,40.0,35.0,168.0,43.1,2.288,33,1


In [3]:
data.shape
# data.describe()
# data['Outcome'].unique()

(768, 10)

In [4]:
# specify input and ouput data
X = data.iloc[:, 0:(len(data.columns)-1)] # input
y = data.iloc[:, len(data.columns)-1]   # output

# split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
# standadize input data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# define and train KNN model
model = LinearRegression().fit(X_train_scaled, y_train)
model.coef_, model.intercept_

(array([-0.01611157,  0.03032066,  0.1979481 , -0.01427682,  0.00043618,
         0.00202305,  0.11339384,  0.01574726,  0.06376314]),
 0.35009310986964626)

In [7]:
# predict on test data
y_pred = model.predict(X_test_scaled)
print(y_pred)

[ 2.91488370e-01  2.45175769e-01  1.30329034e-01  1.58138694e-01
  4.87600263e-01  4.62715517e-01 -2.23903905e-01  3.92707459e-01
  5.49194880e-01  7.23291771e-01  2.63406552e-01  9.06144597e-01
  4.96310956e-01  2.95606705e-01 -1.31472909e-02  3.85598810e-01
  1.29165313e-01  3.51487000e-02  6.79379441e-01  5.71418654e-01
  2.33996586e-01  4.98972797e-02  4.77181237e-01  1.15909519e-01
  5.39969881e-01  8.78428306e-01  1.22286599e-01 -9.41713675e-02
  3.35298748e-01  1.19195660e-01  9.04101023e-01  7.94795518e-01
  8.10642745e-01  6.18840905e-01  5.50570253e-01  6.44617923e-01
  1.02198542e+00  2.83623841e-01  4.53366733e-01  5.27936831e-01
  8.38864850e-03  5.03981439e-01  5.10729508e-01  3.09689022e-01
 -6.53430557e-02  5.83241585e-01  5.54719777e-01  2.58493891e-01
  3.42945712e-01  1.03947803e+00 -4.48712959e-02  6.13821993e-01
  7.91590206e-01  3.04501579e-01  1.87276039e-01 -6.35759045e-02
  7.14449184e-01 -1.85724546e-02  3.82260023e-01  7.11120868e-01
  6.33109735e-01  3.43960

In [8]:
y_pred[y_pred>=0.5] = 1
y_pred[y_pred<0.5] = 0
print(y_pred)

[0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0.
 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 0. 1. 1. 0. 0. 1. 1. 0.
 0. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0.
 1. 0. 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 0. 0. 0.
 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0.
 1. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [9]:
# evaluate the model
print(accuracy_score(y_test, y_pred))

0.7445887445887446


In [10]:
print(confusion_matrix(y_test, y_pred))

[[125  26]
 [ 33  47]]
