In [25]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

In [26]:
df = pd.read_csv('../data/diabetes.csv')

In [27]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [28]:
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

In [29]:
def model_comparison(split_ratio):
    results = {}
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_ratio, random_state=42, stratify=y)


    scaler = RobustScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)


    lr_model = LogisticRegression(max_iter=1000, solver='lbfgs')
    lr_model.fit(X_train_scaled, y_train)
    y_train_lr = lr_model.predict(X_train_scaled)
    y_test_lr = lr_model.predict(X_test_scaled)


    knn_model = KNeighborsClassifier(n_neighbors=5)
    knn_model.fit(X_train_scaled, y_train)
    y_train_knn = knn_model.predict(X_train_scaled)
    y_test_knn = knn_model.predict(X_test_scaled)


    results['split_ratio'] = 1 - split_ratio
    results['lr_train_acc'] = accuracy_score(y_train, y_train_lr)
    results['lr_test_acc'] = accuracy_score(y_test, y_test_lr)
    results['knn_train_acc'] = accuracy_score(y_train, y_train_knn)
    results['knn_test_acc'] = accuracy_score(y_test, y_test_knn)
    return results

In [30]:
results_70_30 = model_comparison(0.30)
results_60_40 = model_comparison(0.40)

comparison_df = pd.DataFrame([results_70_30, results_60_40])
comparison_df

Unnamed: 0,split_ratio,lr_train_acc,lr_test_acc,knn_train_acc,knn_test_acc
0,0.7,0.787709,0.744589,0.804469,0.731602
1,0.6,0.795652,0.737013,0.823913,0.730519
