In [1]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression


# step-1 : load data
url = "https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv"
df = pd.read_csv(url)

# step-2: preprocessing the data

cols_with_zeros = ['Glucose','BloodPressure','SkinThickness','Insulin','BMI']
df[cols_with_zeros] = df[cols_with_zeros].replace(0,np.nan)


imputer = SimpleImputer(strategy = 'mean')

df[cols_with_zeros] = imputer.fit_transform(df[cols_with_zeros])

# step-3 : Model Training Phase



X = df[['Pregnancies','Glucose','BloodPressure','SkinThickness','Insulin','BMI','DiabetesPedigreeFunction','Age',]]
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)

pipeline = Pipeline([
    ("scaling",StandardScaler()),
    ('model',LogisticRegression())
])
pipeline.fit(X_train,y_train)

y_pred = pipeline.predict(X_test)


from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print("Accuracy of model" , accuracy_score(y_test,y_pred))
print("Configuration of model" , confusion_matrix(y_test,y_pred))
print("Classification report of model" , classification_report(y_test,y_pred))
