**Import Library**

In [5]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB # import Naive Bayes model Gaussian (asumsi data terdistribusi normal)
from sklearn.svm import SVC # import SVM classifier
from sklearn.ensemble import VotingClassifier # import model Voting
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

**Persiapan Data**

In [6]:
# Load data
df = pd.read_csv('data/diabetes.csv')

In [7]:
feature_columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']

# Impute nilai 0 dengan mean
from sklearn.impute import SimpleImputer

fill_values = SimpleImputer(missing_values=0, strategy="mean", copy=False)

df[feature_columns] = fill_values.fit_transform(df[feature_columns])

**Split data training dan testing**

In [8]:
X = df[feature_columns]
y = df.Outcome

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

**Training dengan Logistic Regression**

In [9]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(max_iter=1000)

# Fit ke model
log_reg.fit(X_train, y_train)

# Prediksi
y_pred_log_reg = log_reg.predict(X_test)

# Evaluasi akurasi testing data
acc_log_reg = accuracy_score(y_test, y_pred_log_reg)

# Print hasil evaluasi
print("Test set accuracy: {:.2f}".format(acc_log_reg))
print(f"Test set accuracy: {acc_log_reg}")

Test set accuracy: 0.74
Test set accuracy: 0.7359307359307359


**Training dengan SVM kernel polynomial**

In [10]:
svm_poly = SVC(kernel='poly', degree=3)

# Fit ke model
svm_poly.fit(X_train, y_train)

# Prediksi
y_pred_svm_poly = svm_poly.predict(X_test)

# Evaluasi akurasi testing data
acc_svm_poly = accuracy_score(y_test, y_pred_svm_poly)

# Print hasil evaluasi
print("Test set accuracy: {:.2f}".format(acc_svm_poly))
print(f"Test set accuracy: {acc_svm_poly}")

Test set accuracy: 0.74
Test set accuracy: 0.7402597402597403


**Training dengan Decission Tree**

In [11]:
from sklearn.tree import DecisionTreeClassifier
decision_tree = DecisionTreeClassifier()

# Fit ke model
decision_tree.fit(X_train, y_train)

# Prediksi
y_pred_decision_tree = decision_tree.predict(X_test)

# Evaluasi akurasi testing data
acc_decision_tree = accuracy_score(y_test, y_pred_decision_tree)

# Print hasil evaluasi
print("Test set accuracy: {:.2f}".format(acc_decision_tree))
print(f"Test set accuracy: {acc_decision_tree}")

Test set accuracy: 0.71
Test set accuracy: 0.7142857142857143
