In [54]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import ConfusionMatrixDisplay

In [55]:
df = pd.read_csv('pimadataorig.csv')

df

Unnamed: 0,num_preg,glucose_conc,diastolic_bp,skin_thickness,insulin,bmi,diab_pred,age,diabetes
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


### Coba training dengan algoritma lain (logistic regression, desicion tree, random forest, KNN, Naive Bayes) dan hitung metricnya.

In [56]:
X = df[['num_preg', 'glucose_conc', 'diastolic_bp', 'skin_thickness', 'insulin', 'bmi', 'diab_pred', 'age']]
y = df['diabetes']

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [58]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [59]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB()
}

In [60]:
metrics_results = {}

In [61]:
for model_name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    metrics_results[model_name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    }
    
    #print(f'{model_name}:')
    #print(f'Accuracy: {accuracy:.2f}')
    #print(f'Precision: {precision:.2f}')
    #print(f'Recall: {recall:.2f}')
    #print(f'F1 Score: {f1:.2f}')
    #print('----------------------')

In [62]:
for model_name, metrics in metrics_results.items():
    print(f'{model_name}:')
    print(f'Accuracy: {metrics["Accuracy"]:.2f}')
    print(f'Precision: {metrics["Precision"]:.2f}')
    print(f'Recall: {metrics["Recall"]:.2f}')
    print(f'F1 Score: {metrics["F1 Score"]:.2f}')
    print('-'*20)

Logistic Regression:
Accuracy: 0.75
Precision: 0.65
Recall: 0.67
F1 Score: 0.66
--------------------
Decision Tree:
Accuracy: 0.75
Precision: 0.63
Recall: 0.73
F1 Score: 0.68
--------------------
Random Forest:
Accuracy: 0.73
Precision: 0.62
Recall: 0.65
F1 Score: 0.64
--------------------
KNN:
Accuracy: 0.69
Precision: 0.58
Recall: 0.51
F1 Score: 0.54
--------------------
Naive Bayes:
Accuracy: 0.77
Precision: 0.66
Recall: 0.71
F1 Score: 0.68
--------------------


In [65]:
    print('recall_score\t:', recall_score(y_test, y_pred))
    print('precision_score\t:', precision_score(y_test, y_pred))
    print('accuracy_score\t:', accuracy_score(y_test, y_pred))

recall_score	: 0.6727272727272727
precision_score	: 0.6491228070175439
accuracy_score	: 0.7532467532467533


### Gunakan MinMaxScaler dalam dan run poin 1.

In [66]:
X = df[['num_preg', 'glucose_conc', 'diastolic_bp', 'skin_thickness', 'insulin', 'bmi', 'diab_pred', 'age']]
y = df['diabetes']

In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [68]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [69]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB()
}

In [70]:
metrics_results = {}

In [87]:
for model_name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    metrics_results[model_name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    }
    
    #print(f'{model_name}:')
    #print(f'Accuracy: {accuracy:.2f}')
    #print(f'Precision: {precision:.2f}')
    #print(f'Recall: {recall:.2f}')
    #print(f'F1 Score: {f1:.2f}')
    #print('----------------------')

In [88]:
for model_name, metrics in metrics_results.items():
    print(f'{model_name}:')
    print(f'Accuracy: {metrics["Accuracy"]:.2f}')
    print(f'Precision: {metrics["Precision"]:.2f}')
    print(f'Recall: {metrics["Recall"]:.2f}')
    print(f'F1 Score: {metrics["F1 Score"]:.2f}')
    print('-'*20)

Logistic Regression:
Accuracy: 0.75
Precision: 0.68
Recall: 0.58
F1 Score: 0.63
--------------------
Decision Tree:
Accuracy: 0.76
Precision: 0.66
Recall: 0.69
F1 Score: 0.67
--------------------
Random Forest:
Accuracy: 0.72
Precision: 0.61
Recall: 0.60
F1 Score: 0.61
--------------------
KNN:
Accuracy: 0.68
Precision: 0.56
Recall: 0.53
F1 Score: 0.54
--------------------
Naive Bayes:
Accuracy: 0.77
Precision: 0.66
Recall: 0.71
F1 Score: 0.68
--------------------


In [73]:
    print('recall_score\t:', recall_score(y_test, y_pred))
    print('precision_score\t:', precision_score(y_test, y_pred))
    print('accuracy_score\t:', accuracy_score(y_test, y_pred))

recall_score	: 0.5818181818181818
precision_score	: 0.6808510638297872
accuracy_score	: 0.7532467532467533
