In [46]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


model = LogisticRegression()
scaler = MinMaxScaler()

df = pd.read_csv("diabetes.csv")


# Dropping rows where Glucose or BMI are 0 (invalid values)
df = df[df['Glucose'] != 0]
df = df[df['BMI'] != 0]



# Replacing the 0 values in these columns with the median
df['Insulin']=df['Insulin'].replace(0, df['Insulin'].median())
df['BloodPressure']=df['BloodPressure'].replace(0, df['BloodPressure'].median())
df['SkinThickness']=df['SkinThickness'].replace(0, df['SkinThickness'].median())


# Scaling the values using MinMaxScaler and putting it back into the columns
df['Insulin'] = scaler.fit_transform(df[['Insulin']])
df['BloodPressure'] = scaler.fit_transform(df[['BloodPressure']])
df['SkinThickness'] = scaler.fit_transform(df[['SkinThickness']])
df['Glucose'] = scaler.fit_transform(df[['Glucose']])
df['BMI'] = scaler.fit_transform(df[['BMI']])

X = df.drop(columns='Outcome')
Y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=42)

# Splitting in to training and testing sets
model.fit(X_train, y_train)
predictions = model.predict(X_test)

# Predicting and evaluating
print(predictions)
accuracy = accuracy_score(y_test, predictions)
print("Model Accuracy:", round(accuracy * 100, 2), "%")










[1 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 1
 0 1 0 0 0 1 0 0 0 1 1 1 1 1 0 0 1 1 0 1 0 1 0 1 0 0 1 0 1 0 0 0 0 0 1 0 1
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 1 0 0 1 0
 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 1 1 0 0 1 0
 0 0 1]
Model Accuracy: 78.81 %
