In [1]:
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, roc_curve


In [2]:
# Load the pickled lr object
with open('logistic_regression_model.pkl', 'rb') as lr_file:
    scaler, lr = pickle.load(lr_file)

In [3]:
unknown_data = {

    'Pregnancies': np.random.randint(1, 17, size=154),
    'Glucose': np.random.randint(44, 199, size=154),
    'BloodPressure': np.random.randint(24, 122, size=154),
    'SkinThickness': np.random.randint(7, 99, size=154),
    'Insulin': np.random.randint(18, 846, size=154),
    'BMI': np.random.randint(18, 68, size=154),
    'DiabetesPedigreeFunction': np.random.randint(0.078, 2.42, size=154),
    'Age': np.random.randint(21, 81, size=154)
    }

In [4]:
#converting the dictionar into df
unknown_df = pd.DataFrame(unknown_data)
unknown_df.describe()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
count,154.0,154.0,154.0,154.0,154.0,154.0,154.0,154.0
mean,8.974026,119.766234,71.74026,54.428571,417.935065,42.461039,0.480519,51.233766
std,4.816839,46.502343,29.799505,25.94183,240.423338,14.876895,0.50125,17.914755
min,1.0,45.0,24.0,7.0,20.0,18.0,0.0,21.0
25%,5.0,73.25,44.0,32.0,202.25,29.0,0.0,36.25
50%,10.0,119.0,69.0,55.5,437.0,41.5,0.0,54.0
75%,13.0,155.0,98.0,76.75,610.75,56.0,1.0,66.75
max,16.0,198.0,120.0,98.0,833.0,67.0,1.0,80.0


In [5]:
#scaling the data according to similar scaling done in main file

# Use the scaler to transform new data
new_scaled_data = scaler.transform(unknown_df)

# Make predictions using the logistic regression model
predictions = lr.predict(new_scaled_data)



In [6]:
#opening the Y_test cases for evaluation of model
with open('y_test.pkl', 'rb') as file:
    y_test = pickle.load(file)



In [7]:
# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
roc_auc = roc_auc_score(y_test, predictions)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("ROC AUC Score:", roc_auc)

Accuracy: 0.45454545454545453
Precision: 0.34065934065934067
Recall: 0.5636363636363636
F1 Score: 0.4246575342465754
ROC AUC Score: 0.47878787878787876
