In [1]:
import pandas as pd
# import numpy as np

In [2]:
df = pd.read_csv("diabetes.csv")

In [3]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1
767,1,93,70,31,0,30.4,0.315,23,0


In [4]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [5]:
x = df.drop("Outcome", axis=1)
# x = np.array(x)
x

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47
767,1,93,70,31,0,30.4,0.315,23


In [6]:
y = df["Outcome"]
y

0      1
1      0
2      1
3      0
4      1
      ..
764    0
765    0
766    1
767    0
768    1
Name: Outcome, Length: 769, dtype: int64

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

In [9]:
from sklearn.linear_model import LogisticRegression

In [10]:
model = LogisticRegression(max_iter=1000).fit(x_train, y_train)
model

In [11]:
y_pred = model.predict(x_test)
y_pred

array([1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
      dtype=int64)

In [12]:
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score

In [13]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2_sc = r2_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

In [14]:
print(f"Mean Absolute Error = ", {mae})
print(f"Mean Squared Error = ", {mse})
print(f"R2 Score = ", {r2_sc})
print(f"Accuracy = ", {accuracy})

Mean Absolute Error =  {0.2792207792207792}
Mean Squared Error =  {0.2792207792207792}
R2 Score =  {-0.22629629629629644}
Accuracy =  {0.7207792207792207}


<h3>OPTIMIZATION</h3>

In [15]:
from sklearn.model_selection import GridSearchCV

In [16]:
model = LogisticRegression()
model

In [17]:
param_grid = {
     "penalty":[None, 'l2'],
     "C":[1.0, 1.5],
     "solver":['newton-cg', 'newton-cholesky']
 }

In [18]:
grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(x_train, y_train)

In [19]:
best_params = grid_search.best_params_
print("Best Parameters :", best_params)

Best Parameters : {'C': 1.0, 'penalty': None, 'solver': 'newton-cg'}


In [20]:
best_model = LogisticRegression(**best_params)

In [21]:
best_model.fit(x_train, y_train)

In [22]:
best_model

In [23]:
y_pred = best_model.predict(x_test)
y_pred

array([1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
      dtype=int64)

In [24]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2_sc = r2_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

In [25]:
print(f"Mean Absolute Error = ", {mae})
print(f"Mean Squared Error = ", {mse})
print(f"R2 Score = ", {r2_sc})
print("Best Parameters :", best_params)
print(f"Accuracy = ", {accuracy})

Mean Absolute Error =  {0.2792207792207792}
Mean Squared Error =  {0.2792207792207792}
R2 Score =  {-0.22629629629629644}
Best Parameters : {'C': 1.0, 'penalty': None, 'solver': 'newton-cg'}
Accuracy =  {0.7207792207792207}
