In [1]:
import pandas as pd

data = pd.read_csv("data/diabetes.csv")
df = data.copy()

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
numeric_features = ["Pregnancies","Glucose","BloodPressure","SkinThickness","Insulin","BMI","DiabetesPedigreeFunction","Age"]

numeric_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ("num",numeric_transformer,numeric_features)
    ]
)

clf = Pipeline(steps=[
    ("preprocessor",preprocessor),
    ("classifier",LogisticRegression())
])

In [3]:
from sklearn.model_selection import train_test_split
X = df.drop(columns=["Outcome"])
y = df["Outcome"]
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

clf.fit(X_train,y_train)
prediction = clf.predict(X_test)

In [4]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score, accuracy_score,precision_score,recall_score,confusion_matrix,f1_score
import numpy as np
mse = mean_squared_error(y_test,prediction)
mae = mean_absolute_error(y_test,prediction)
rmse = np.sqrt(mse)
r2 = r2_score(y_test,prediction)


# print("Accuracy: ",accuracy_score(y_test,prediction))
# print("Precision Score: ",precision_score(y_test,prediction))
# print("Recall Score: ",recall_score(y_test,prediction))
# print("F1 Score: ",f1_score(y_test,prediction))
print("Confusion Matrix: ", confusion_matrix(y_test,prediction))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R² Score:", r2)

Confusion Matrix:  [[79 20]
 [18 37]]
Mean Absolute Error (MAE): 0.24675324675324675
Mean Squared Error (MSE): 0.24675324675324675
Root Mean Squared Error (RMSE): 0.496742636335202
R² Score: -0.074747474747475
