In [1]:
# Import Pandas.
import pandas as pd

In [2]:
# Read in CSV data file.
df = pd.read_csv('Resources/diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
# Prepare the data - separate the Outcome column from the rest.
y = df['Outcome']
X = df.drop(columns='Outcome')

In [4]:
# Split the data into training and testing.
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [5]:
# See the shape of the X_train data.
X_train.shape

(576, 8)

In [6]:
# Import LogisticRegression model and instantiate it.
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(solver='lbfgs', max_iter=200, random_state=1)

In [7]:
# Train/fit the data to the model.
classifier.fit(X_train, y_train)

LogisticRegression(max_iter=200, random_state=1)

In [8]:
# Make predictions using the test data.
y_pred = classifier.predict(X_test)

# Store predictions in DataFrame next to actual values.
pd.DataFrame({"Prediction": y_pred, "Actual": y_test})

Unnamed: 0,Prediction,Actual
118,0,0
132,1,1
3,0,0
693,1,1
654,0,0
...,...,...
427,1,1
135,0,0
501,0,0
392,0,0


In [9]:
# Check accuracy of the model.
from sklearn.metrics import accuracy_score
accuracy_score(y_pred, y_test)

0.7760416666666666

In [10]:
# Import new modules for precision and sensitivity testing.
from sklearn.metrics import confusion_matrix, classification_report

In [11]:
# Create the confustion matrix (TP,FP,TN,FN).
# Feed in results of our predictions, and actual results.
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

[[113  12]
 [ 31  36]]


In [12]:
# Precision.
p = 113 / (113 + 31)
# Sensitivity.
s = 113 / (113 + 12)

print(p)
print(s)

0.7847222222222222
0.904


In [13]:
# Make Scikit do these calculations for us.
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.78      0.90      0.84       125
           1       0.75      0.54      0.63        67

    accuracy                           0.78       192
   macro avg       0.77      0.72      0.73       192
weighted avg       0.77      0.78      0.77       192

