# Import Library

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
import numpy as np

# Import Dataset

In [2]:
data_from_csv = pd.read_csv("diabetes.csv")

In [3]:
data_from_csv.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


# Split fitur dan label

In [23]:
features = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age"]
X = data_from_csv[features]
y = data_from_csv["Outcome"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=16)

# Normalisasi Data

In [24]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Membuat Model Dengan Algoritma Logistik Regresi

In [25]:
model = LogisticRegression(random_state=16)

# Melatih Model dengan Data

In [26]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Melihat hasil akurasi

In [27]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Akurasi: {accuracy * 100:.2f}%')

print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

print('Classification Report:')
print(classification_report(y_test, y_pred))

Akurasi: 81.77%
Confusion Matrix:
[[116   9]
 [ 26  41]]
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.93      0.87       125
           1       0.82      0.61      0.70        67

    accuracy                           0.82       192
   macro avg       0.82      0.77      0.78       192
weighted avg       0.82      0.82      0.81       192



# Tes Prediksi

In [28]:
data_pasien = np.array([[2, 147, 83, 30, 0, 49.9, 0.547, 37]])
result = model.predict(data_pasien)

if result[0] == 0:
    print("Hasil data menunjukkan anda tidak menderita diabetes")
elif result[0] == 1:
    print("Hasil data menunjukkan anda menderita diabetes")

Hasil data menunjukkan anda menderita diabetes


# Membuat Dump Model

In [29]:
joblib.dump(model, 'logistic_regression_diabetes_model.pkl')

['logistic_regression_diabetes_model.pkl']

In [30]:
model = joblib.load('logistic_regression_diabetes_model.pkl')

# Contoh data baru (pastikan formatnya sama dengan data training)
new_data = np.array([[30, 25, 5, 0, 23, 85, 40, 0.5]])  # Sesuaikan dengan fitur dataset
new_data_scaled = scaler.transform(new_data)  # Normalisasi data baru

# Prediksi
prediction = model.predict(new_data_scaled)
print('Prediksi:', 'Diabetes' if prediction[0] == 1 else 'Tidak Diabetes')

Prediksi: Diabetes


