In [60]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [61]:
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
column_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']

In [62]:
df = pd.read_csv(url, header=None, names=column_names)

In [63]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [64]:
# pima = df.iloc[:,1:6][(df != 0).all(axis=1)]

In [65]:
# ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']

In [66]:
pima = df[(df['Glucose'] != 0) & (df['BloodPressure'] != 0) & (df['SkinThickness'] != 0) & (df['Insulin'] != 0) & (df['BMI']!=0)]

In [67]:
# pima = df[['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']][(df != 0).all(axis=1)]

In [73]:
pima.reset_index(drop=True).head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,1,89,66,23,94,28.1,0.167,21,0
1,0,137,40,35,168,43.1,2.288,33,1
2,3,78,50,32,88,31.0,0.248,26,1
3,2,197,70,45,543,30.5,0.158,53,1
4,1,189,60,23,846,30.1,0.398,59,1


In [74]:
X = pima.drop('Outcome', axis=1)
y = pima['Outcome']

In [75]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [76]:
scaler = StandardScaler()

In [77]:
X_train_scaled = scaler.fit_transform(X_train)

In [89]:
X_train_scaled

array([[-1.02718262e+00,  1.86334398e+00,  1.51359244e+00, ...,
         4.66955416e-01, -6.03905973e-01,  3.82185098e-01],
       [-7.22078871e-02, -1.37262755e+00,  1.19953581e+00, ...,
        -8.15248900e-01, -6.27360814e-01, -8.59916471e-01],
       [ 1.51941667e+00,  9.82686032e-02,  1.35656412e+00, ...,
         7.51889709e-01, -5.01291040e-01,  1.71983294e+00],
       ...,
       [-1.02718262e+00, -1.95910627e-01, -3.70747369e-01, ...,
        -3.45107317e-01, -7.91038891e-02, -8.59916471e-01],
       [-7.22078871e-02,  2.08859943e-04,  2.29873403e+00, ...,
         3.43027206e+00,  1.05552408e+00, -8.59916471e-01],
       [ 1.83774158e+00, -3.59343532e-01,  8.85479170e-01, ...,
         1.39280980e-01, -7.62226154e-01,  4.77731373e-01]])

In [78]:
X_test_scaled = scaler.transform(X_test)

In [90]:
X_test_scaled

array([[ 1.20109176e+00, -6.86209343e-01,  2.57365900e-01,
         1.01614390e+00, -4.58272621e-01,  5.66682419e-01,
        -9.26410046e-01,  1.33764784e+00],
       [-1.02718262e+00,  1.30955184e-01,  7.28450853e-01,
         7.36249763e-01,  4.30919119e-01,  4.38461987e-01,
         8.32703083e-01, -7.64370197e-01],
       [ 2.46117024e-01, -1.24188122e+00, -9.98860639e-01,
        -6.63220941e-01, -9.32508216e-01, -7.72508756e-01,
        -6.27360814e-01, -2.86638824e-01],
       [-1.02718262e+00,  1.27498552e+00,  4.14394218e-01,
         2.50891266e+00, -5.00615085e-01,  2.84615676e+00,
         7.00769598e-01, -5.73277648e-01],
       [-1.02718262e+00,  1.89603056e+00,  1.35656412e+00,
         1.38933609e+00,  2.97146695e+00,  1.43573201e+00,
        -8.73636652e-01, -4.77731373e-01],
       [-7.22078871e-02, -8.82328830e-01, -1.15588896e+00,
         4.56355623e-01, -3.73587694e-01, -1.21415691e+00,
         1.24316281e+00,  7.64370197e-01],
       [-7.08857709e-01, -1.699493

In [79]:
model = LogisticRegression(random_state=42)

In [80]:
model.fit(X_train_scaled, y_train)

In [81]:
y_pred = model.predict(X_test_scaled)

In [82]:
accuracy = accuracy_score(y_test, y_pred)

In [87]:
print(f"\nAccuracy: {accuracy:.4f}")


Accuracy: 0.7722


In [83]:
conf_matrix = confusion_matrix(y_test, y_pred)

In [86]:
print("\nConfusion Matrix:")
print(conf_matrix)


Confusion Matrix:
[[45  7]
 [11 16]]


In [84]:
class_report = classification_report(y_test, y_pred)

In [88]:
print("\nClassification Report:")
print(class_report)


Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.87      0.83        52
           1       0.70      0.59      0.64        27

    accuracy                           0.77        79
   macro avg       0.75      0.73      0.74        79
weighted avg       0.77      0.77      0.77        79

