# Dataset Diabetes

Some classification problem on pima-indians-diabetes.csv dataset

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB

In [2]:
directory = os.path.join('D:/Bootcamp ML - Mada/classification dataset/', 'pima-indians-diabetes.csv')
if os.path.isfile(directory):
  print("File ditemukan")
else:
    print("tidak ada")

File ditemukan


In [3]:
dataset = pd.read_csv(directory)
dataset.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [5]:
print(dataset['Outcome'].value_counts())

0    500
1    268
Name: Outcome, dtype: int64


## Correlation matrix from every features on dataset

In [6]:
correlation_matrix = dataset.corr()
corr = correlation_matrix['Outcome'].sort_values(ascending=False)
correlation_dataframe = pd.DataFrame({'column': corr.index,
                 'Correlation with median_house_value': corr.values})
correlation_dataframe

Unnamed: 0,column,Correlation with median_house_value
0,Outcome,1.0
1,Glucose,0.466581
2,BMI,0.292695
3,Age,0.238356
4,Pregnancies,0.221898
5,DiabetesPedigreeFunction,0.173844
6,Insulin,0.130548
7,SkinThickness,0.074752
8,BloodPressure,0.065068


# Feature Engineering

In [7]:
label = dataset['Outcome'] #--> Target Prediksi
feature_used = dataset.drop(['Outcome',],axis=1) #--> Fitur yang digunakan adalah selain tabel 'Outcome'

# Menggunakan 2 metode scalling, Standard dan Power Transform dengan yeo-jhonson
scaler_1 = StandardScaler(with_std=True,with_mean=True)
scaler_2 = PowerTransformer(method='yeo-johnson', standardize=True)

feature_used_1 = scaler_1.fit_transform(feature_used)
feature_used_2 = scaler_2.fit_transform(feature_used)

data_x_1 = pd.DataFrame(feature_used_1, columns=feature_used.columns)
data_x_2 = pd.DataFrame(feature_used_2, columns=feature_used.columns)


In [8]:
def scores(obj, Predict, Feature2, Label2):
    print('Accuracy   on test set: {:.3f}'.format(obj.score(Feature2, Label2)))
    print('F1_score   on test set: {:.3f}'.format(f1_score(Label2, Predict, average='macro')))
    print('Precision  on test set: {:.3f}'.format(precision_score(Label2, Predict, average='macro')))
    print('Recall     on test set: {:.3f}'.format(recall_score(Label2, Predict, average='macro')))

# SPLIT DATA 80-20

# LOGISTIC REGRESSION

In [9]:
# Training menggunakan data hasil scalling dengan Standard Scaler
feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)

# Logistic Regression
print("Menggunakan Standard Scaler")
logReg = LogisticRegression(tol=0.001)
logReg.fit(feature_train,label_train)
cross_val= cross_val_score(logReg, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= logReg.predict(feature_test)
scores(logReg, prediction, feature_test, label_test)

print("-----------------------------------------------------------------------------------------------------------------------------")
print()

print("Menggunakan PowerTransform Scaller")
# Training menggunakan data hasil scalling dengan Power Transform Scaler
feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20,random_state=4)
logReg = LogisticRegression(tol=0.001)
logReg.fit(feature_train,label_train)
cross_val_= cross_val_score(logReg, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= logReg.predict(feature_test)
scores(logReg, prediction, feature_test, label_test)


Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.812
F1_score   on test set: 0.784
Precision  on test set: 0.793
Recall     on test set: 0.778
-----------------------------------------------------------------------------------------------------------------------------

Menggunakan PowerTransform Scaller
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.773
F1_score   on test set: 0.742
Precision  on test set: 0.746
Recall     on test set: 0.739


# MLP Classifier

In [10]:
print("Menggunakan Standard Scaler")
feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
mlc = MLPClassifier(hidden_layer_sizes=8, activation='identity',solver='lbfgs',batch_size='auto', learning_rate_init=0.001, max_iter=10000,early_stopping=False)
mlc.fit(feature_train,label_train)
cross_val_= cross_val_score(mlc, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= mlc.predict(feature_test)
scores(mlc, prediction, feature_test, label_test)

print("-----------------------------------------------------------------------------------------------------------------------------")
print()

print("Menggunakan PowerTransform Scaller")
feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
mlc= MLPClassifier(hidden_layer_sizes=8, activation='identity',solver='lbfgs',batch_size='auto', learning_rate_init=0.001, max_iter=10000, early_stopping=False)
mlc.fit(feature_train,label_train)
cross_val_= cross_val_score(mlc, feature_train, label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= mlc.predict(feature_test)
scores(mlc, prediction, feature_test, label_test)

Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.805
F1_score   on test set: 0.778
Precision  on test set: 0.784
Recall     on test set: 0.773
-----------------------------------------------------------------------------------------------------------------------------

Menggunakan PowerTransform Scaller
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.773
F1_score   on test set: 0.742
Precision  on test set: 0.746
Recall     on test set: 0.739


# Random Forest Classifier

In [11]:
estimator =[5,10,20,30,50,75,100,150]
for i in estimator:
    print("Menggunakan Standard Scaler")
    feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
    print("Untuk Estimator "+str(i))
    rfc = RandomForestClassifier(i,criterion='gini',bootstrap=True,class_weight='balanced_subsample')
    rfc.fit(feature_train,label_train)
    cross_val_= cross_val_score(rfc, feature_train,label_train, cv=5)
    print("Cross Validation Score : "+str(cross_val))
    prediction= rfc.predict(feature_test)
    scores(rfc, prediction, feature_test, label_test)
    print()

    print("Menggunakan PowerTransform Scaller")
    feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
    rfc = RandomForestClassifier(i,criterion='gini',bootstrap=True,class_weight='balanced_subsample')
    rfc.fit(feature_train,label_train)
    cross_val_= cross_val_score(rfc, feature_train,label_train, cv=5)
    print("Cross Validation Score : "+str(cross_val))
    prediction= rfc.predict(feature_test)
    scores(rfc, prediction, feature_test, label_test)
    print()
    print("-----------------------------------------------------------------------------------------------------------------------------")


Menggunakan Standard Scaler
Untuk Estimator 5
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.682
F1_score   on test set: 0.618
Precision  on test set: 0.634
Recall     on test set: 0.614

Menggunakan PowerTransform Scaller
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.714
F1_score   on test set: 0.678
Precision  on test set: 0.679
Recall     on test set: 0.676

-----------------------------------------------------------------------------------------------------------------------------
Menggunakan Standard Scaler
Untuk Estimator 10
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.766
F1_score   on test set: 0.733
Precision  on test set: 0.739
Recall     on test set: 0.729

Menggunakan PowerTransform Scaller
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test s

# Gaussian-Process Classifier

In [12]:
print("Menggunakan Standard Scaler")
feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(kernel=kernel, max_iter_predict=100, optimizer='fmin_l_bfgs_b')
gpc.fit(feature_train,label_train)
cross_val_= cross_val_score(gpc, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= gpc.predict(feature_test)
scores(gpc, prediction, feature_test, label_test)
print()

print("Menggunakan Power Transform Scaler")
feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(kernel=kernel, max_iter_predict=100,optimizer='fmin_l_bfgs_b')
gpc.fit(feature_train,label_train)
cross_val_= cross_val_score(gpc, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= gpc.predict(feature_test)
scores(gpc, prediction, feature_test, label_test)
print()

Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.766
F1_score   on test set: 0.739
Precision  on test set: 0.739
Recall     on test set: 0.739

Menggunakan Power Transform Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.773
F1_score   on test set: 0.745
Precision  on test set: 0.746
Recall     on test set: 0.744



# KNN Classifier

In [13]:
numbers = 16
for i in range(numbers):
    if i %2 != 0 :
        print("Menggunakan nilai k : "+str(i))
        print("Menggunakan Standard Scaler")
        feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
        knn = KNeighborsClassifier(i,weights='distance',algorithm='auto')
        knn.fit(feature_train,label_train)
        cross_val_= cross_val_score(knn, feature_train,label_train, cv=5)
        print("Cross Validation Score : "+str(cross_val))
        prediction= knn.predict(feature_test)
        scores(knn, prediction, feature_test, label_test)
        print()

        print("Menggunakan Power Transform Scaler")
        feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
        knn = KNeighborsClassifier(i,weights='distance',algorithm='auto')
        knn.fit(feature_train,label_train)
        cross_val_= cross_val_score(knn, feature_train,label_train, cv=5)
        print("Cross Validation Score : "+str(cross_val))
        prediction= knn.predict(feature_test)
        scores(knn, prediction, feature_test, label_test)
        print()
        print("-----------------------------------------------------------------------------------------------------------------------------")

Menggunakan nilai k : 1
Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.701
F1_score   on test set: 0.666
Precision  on test set: 0.666
Recall     on test set: 0.666

Menggunakan Power Transform Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.630
F1_score   on test set: 0.599
Precision  on test set: 0.597
Recall     on test set: 0.603

-----------------------------------------------------------------------------------------------------------------------------
Menggunakan nilai k : 3
Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.747
F1_score   on test set: 0.713
Precision  on test set: 0.716
Recall     on test set: 0.710

Menggunakan Power Transform Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy 

# Adaboost Classifier

In [14]:
estimator =[5,10,20,30,50,75,100,150]
for i in estimator:
    print("Menggunakan Standard Scaler")
    feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
    print("Untuk Estimator "+str(i))
    adc = AdaBoostClassifier(n_estimators=i,learning_rate=0.0001, random_state=10,algorithm='SAMME')
    adc.fit(feature_train,label_train)
    cross_val_= cross_val_score(adc, feature_train,label_train, cv=5)
    print("Cross Validation Score : "+str(cross_val))
    prediction= adc.predict(feature_test)
    scores(adc, prediction, feature_test, label_test)
    print()

    print("Menggunakan PowerTransform Scaller")
    feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
    adc = AdaBoostClassifier(n_estimators=i,learning_rate=0.0001, random_state=10,algorithm='SAMME')
    adc.fit(feature_train,label_train)
    cross_val_= cross_val_score(adc, feature_train,label_train, cv=5)
    print("Cross Validation Score : "+str(cross_val))
    prediction= adc.predict(feature_test)
    scores(adc, prediction, feature_test, label_test)
    print()
    print("-----------------------------------------------------------------------------------------------------------------------------")

Menggunakan Standard Scaler
Untuk Estimator 5
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.792
F1_score   on test set: 0.777
Precision  on test set: 0.771
Recall     on test set: 0.791

Menggunakan PowerTransform Scaller
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.792
F1_score   on test set: 0.777
Precision  on test set: 0.771
Recall     on test set: 0.791

-----------------------------------------------------------------------------------------------------------------------------
Menggunakan Standard Scaler
Untuk Estimator 10
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.792
F1_score   on test set: 0.777
Precision  on test set: 0.771
Recall     on test set: 0.791

Menggunakan PowerTransform Scaller
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test s

# Decision Tree Classifier

In [15]:
feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
print("Menggunakan Standard Scaler")
dtc=DecisionTreeClassifier(criterion='entropy',splitter='best',class_weight='balanced')
dtc.fit(feature_train,label_train)
cross_val_= cross_val_score(dtc, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= dtc.predict(feature_test)
scores(dtc, prediction, feature_test, label_test)
print()

print("Menggunakan PowerTransform Scaller")
feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
dtc = DecisionTreeClassifier(criterion='entropy',splitter='best',class_weight='balanced')
dtc.fit(feature_train,label_train)
cross_val_= cross_val_score(dtc, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= dtc.predict(feature_test)
scores(dtc, prediction, feature_test, label_test)
print()

Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.727
F1_score   on test set: 0.703
Precision  on test set: 0.699
Recall     on test set: 0.709

Menggunakan PowerTransform Scaller
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.740
F1_score   on test set: 0.721
Precision  on test set: 0.717
Recall     on test set: 0.733



# Gaussian Naive-Bayes

In [16]:
feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
print("Menggunakan Standard Scaler")
gnb=GaussianNB()
gnb.fit(feature_train,label_train)
cross_val_= cross_val_score(gnb, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= gnb.predict(feature_test)
scores(gnb, prediction, feature_test, label_test)
print()

print("Menggunakan PowerTransform Scaller")
feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
gnb=GaussianNB()
gnb.fit(feature_train,label_train)
cross_val_= cross_val_score(gnb, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= gnb.predict(feature_test)
scores(gnb, prediction, feature_test, label_test)
print()

Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.760
F1_score   on test set: 0.733
Precision  on test set: 0.732
Recall     on test set: 0.734

Menggunakan PowerTransform Scaller
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.773
F1_score   on test set: 0.747
Precision  on test set: 0.746
Recall     on test set: 0.748



# SVC

In [21]:
kernels=['poly','linear','rbf']
for i, value in enumerate(kernels):
    print("Menggunakan kernel : "+str(value))
    feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
    print("Menggunakan Standard Scaler")
    svl=SVC(kernel= value, C=1000,gamma='scale',tol=0.0001,class_weight='balanced',degree=3,coef0=1)
    svl.fit(feature_train,label_train)
    cross_val_= cross_val_score(svl, feature_train,label_train, cv=5)
    print("Cross Validation Score : "+str(cross_val))
    prediction= svl.predict(feature_test)
    scores(svl, prediction, feature_test, label_test)
    print()

    feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
    print("Menggunakan Power Transform Scaler")
    svl=SVC(kernel= value, C=1000,gamma='scale',tol=0.0001,class_weight='balanced',degree=3,coef0=1)
    svl.fit(feature_train,label_train)
    cross_val_= cross_val_score(svl, feature_train,label_train, cv=5)
    print("Cross Validation Score : "+str(cross_val))
    prediction= svl.predict(feature_test)
    scores(svl, prediction, feature_test, label_test)
    print()
    print("-----------------------------------------------------------------------------------------------------------------------------")

Menggunakan kernel : poly
Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.682
F1_score   on test set: 0.646
Precision  on test set: 0.645
Recall     on test set: 0.647

Menggunakan Power Transform Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.675
F1_score   on test set: 0.634
Precision  on test set: 0.635
Recall     on test set: 0.632

-----------------------------------------------------------------------------------------------------------------------------
Menggunakan kernel : linear
Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.740
F1_score   on test set: 0.719
Precision  on test set: 0.715
Recall     on test set: 0.729

Menggunakan Power Transform Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Acc

# LinearSVC

In [23]:
feature_train, feature_test, label_train, label_test = train_test_split(data_x_1, label, test_size = 0.20, random_state=4)
print("Menggunakan Standard Scaler")
linearsvc = LinearSVC(class_weight='balanced',max_iter=1000,loss='hinge')
linearsvc.fit(feature_train,label_train)
cross_val_= cross_val_score(linearsvc, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= linearsvc.predict(feature_test)
scores(svl, prediction, feature_test, label_test)
print()

feature_train, feature_test, label_train, label_test = train_test_split(data_x_2, label, test_size = 0.20, random_state=4)
print("Menggunakan Power Transform Scaler")
linearsvc = LinearSVC(class_weight='balanced',max_iter=1000,loss='hinge')
linearsvc.fit(feature_train,label_train)
cross_val_= cross_val_score(linearsvc, feature_train,label_train, cv=5)
print("Cross Validation Score : "+str(cross_val))
prediction= linearsvc.predict(feature_test)
scores(svl, prediction, feature_test, label_test)
print()

Menggunakan Standard Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.701
F1_score   on test set: 0.721
Precision  on test set: 0.717
Recall     on test set: 0.733

Menggunakan Power Transform Scaler
Cross Validation Score : [0.80487805 0.78861789 0.76422764 0.71544715 0.73770492]
Accuracy   on test set: 0.727
F1_score   on test set: 0.725
Precision  on test set: 0.722
Recall     on test set: 0.743



# Kesimpulan

Pada percobaan diatas, menggunakan dataset 'Diabetes'. digunakan 10 algoritma untuk mengklasifikasi data menjadi 2 kelas
yaitu 0 = 'Tidak terkena Diabetes' , 1 = 'Terkena Diabetes' .

Dataset tersebut di-scaling dengan 2 scaler, yaitu 'Standard scaler' dan 'Power Transform'.

* Algoritma LogisticRegression mendapat nilai akurasi dan precision

    Standard scaler : 0.812 dan 0.793 , Power Transform : 0.773 dan 0,743

* Algoritma MLP Classifier mendapat nilai akurasi dan precision

    Standard scaler : 0.805 dan 0.784 , Power Transform : 0.773 dan 0.746

* Algoritma RandomForest Classifier mendapat nilai akurasi dan precision
yang terbaik ada pada nilai estimator 75, dengan :

    Standard scaler : 0.773 dan 0.743 , Power Transform : 0.773 dan 0.746

* Algoritma KNN Classifier mendapat nilai akurasi dan precision
yang terbaik ada pada nilai k = 9, dengan :

    Standard scaler : 0.773 dan 0.743 , Power Transform : 0.753 dan 0.724

* Algoritma Adaboost Classifier mendapat nilai akurasi dan precision
yang terbaik ada pada nilai estimator 5, dengan :

    Standard scaler : 0.792 dan 0.771 , Power Transform : 0.792 dan 0.771

* Algoritma Decision Tree Classifier mendapat nilai akurasi dan precision

    Standard scaler : 0.727 dan 0.699 , Power Transform : 0.740 dan 0.717

* Algoritma Gaussian Naive-Bayes mendapat nilai akurasi dan precision

    Standard scaler : 0.760 dan 0.732 , Power Transform : 0.773 dan 0.746

* Algoritma SVC yang mendapat nilai terbaik menggunakan kernel 'linear'
dengan nilai akurasi dan precision

    Standard scaler : 0.740 dan 0.715 , Power Transform : 0.740 dan 0.722

* Algoritma LinearSVC mendapat nilai akurasi dan precision

    Standard scaler : 0.701 dan 0.717 , Power Transform : 0.727, 0.722

Dari hasil percobaan diatas, algoritma LogisticRegression dengan menggunakan data yang
di-scaling dengan Standar scaler mendapat hasil akurasi yang terbaik sebesar 0.812

Maka, dapat disimpulkan bahwa penggunaan Standard scaler pada dataset ini sudah mencukupi
untuk bisa mendapat nilai akurasi dan precision yang cukup baik pada kebanyakan algoritma yang digunakan. 
Tapi, dalam beberapa algoritma akan lebih baik hasilnya jika menggunakan Power Transform scaler.

Hasil klasifikasi yang didapatkan oleh model algoritma dapat dipengaruhi oleh banyak faktor,
salah satunya adalah penggunaan Hyperparameter pada tiap algoritma