In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit

<h1>PREPARING DATA</h1>

<h4>0. Input Data</h4>

In [None]:
df = pd.read_excel('input\Data untuk Analisis.xlsx')
df.info()
df.describe()

In [None]:
df.drop(['Nitrogen_class', 'Flavonoid_class', 'Treatmen'], axis=1, inplace=True)

In [None]:
df.head()

In [None]:
sns.countplot(y=df.SPAD_class, data=df)
plt.xlabel("Count of each Target class")
plt.ylabel("Target classes")
plt.show()

<h4>1. Membagi Data untuk Training dan Testing</h4>

In [None]:
X = df.drop(['SPAD_class'], axis=1)
y = df.SPAD_class

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.25)

print(len(X_train))
print(len(y_train))
print(len(X_test))
print(len(y_test))

<h4>2. Standarisasi Data</h4>
<p>Feature Skaling bertujuan agar satu variable independen tidak mendominasi variable yang lain sehingga perhitungan yang dilakuan menjadi lebih mudah.</p>

In [None]:
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

#print(X_train)
#print(X_test)

<h1>METODE 1: KNN</h1>

In [None]:
from sklearn.neighbors import KNeighborsClassifier

<h4>1. Menentukan Nilai k</h4>

In [None]:
myList = list(range(0, 50))
neighbors = list(filter(lambda x : x % 2 != 0, myList))
neighbors
cv_scores = []

for k in neighbors:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_train, y_train, cv=10, scoring='accuracy' )
    cv_scores.append(scores.mean())

MSE = [1 - x for x in cv_scores]

optimal_k = neighbors[MSE.index(min(MSE))]
print('\nThe optimal number of neighbors is %d.' % optimal_k)
plt.plot(neighbors, MSE)
plt.title('Optimal K-Neighbours', y=1.1)
plt.xlabel('Number of Neighbors K')
plt.ylabel('Misclassification Error')
plt.show()

<h4>2. Tune KNN</h4>

In [None]:
clf = KNeighborsClassifier(metric= 'euclidean', n_neighbors= optimal_k)
clf.fit(X_train, y_train)

y_pred_knn = clf.predict(X_test)
pd.crosstab(y_test, y_pred_knn)

<h4>3. Menghitung Akurasi</h4>

In [None]:
print(classification_report(y_test, y_pred_knn))
cm = confusion_matrix(y_test, y_pred_knn)
print(cm)
print('Recall:{}'.format(recall_score(y_test, y_pred_knn, average='macro')))
print('Precision:{}'.format(precision_score(y_test, y_pred_knn, average='macro')))
print('F1-Score:{}'.format(f1_score(y_test, y_pred_knn, average='macro')))

In [None]:
cm = confusion_matrix(y_test, y_pred_knn)
accuracy = accuracy_score(y_test, y_pred_knn)*100
print("Accuracy with K-NN: {0:.2f}%".format(accuracy))

plt.figure(figsize=(3.5,2.5))
p = sns.heatmap(pd.DataFrame(cm), annot=True, cmap="YlGnBu", fmt='g')
plt.title('Confusion matrix KNN', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

<h1>METODE 2 : SVM</h1>

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from transformers import pipeline

<h4>1. Menentukan Parameter Terbaik</h4>

In [None]:
cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=8)

param_grid = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto'],
    }

gridSearch = GridSearchCV(SVC(), param_grid, cv=cv,
                          scoring=['recall','f1'],refit='f1',verbose=2)
gridSearch.fit(X_train, y_train)
print('Score: ', gridSearch.best_score_)
print('Parameters: ', gridSearch.best_params_)

<h4>2. Tune SVM</h4>

In [None]:
svm = make_pipeline(StandardScaler(), SVC(gamma='scale', kernel='linear'))
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
pd.crosstab(y_test, y_pred_svm)

<h4>3. Menghitung Akurasi</h4>

In [None]:
print(classification_report(y_test, y_pred_svm))
cm = confusion_matrix(y_test, y_pred_svm)
print(cm)
print('Recall:{}'.format(recall_score(y_test, y_pred_svm, average='macro')))
print('Precision:{}'.format(precision_score(y_test, y_pred_svm, average='macro')))
print('F1-Score:{}'.format(f1_score(y_test, y_pred_svm, average='macro')))

In [None]:
cm = confusion_matrix(y_test, y_pred_svm)
accuracy = accuracy_score(y_test, y_pred_svm)*100
print("Accuracy with SVM: {0:.2f}%".format(accuracy))

plt.figure(figsize=(3.5,2.5))
p = sns.heatmap(pd.DataFrame(cm), annot=True, cmap="YlGnBu", fmt='g')
plt.title('Confusion matrix SVC', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

<h1>METODE 3: Decission Trees (dt)</h1>

In [None]:
from sklearn.tree import DecisionTreeClassifier

<h4>1. Menentukan Parameter Terbaik</h4>

In [None]:
cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=8)

param_grid = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    }

gridSearch = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=cv,
                          scoring=['recall','f1'],refit='f1',verbose=2)
gridSearch.fit(X_train, y_train)
print('Score: ', gridSearch.best_score_)
print('Parameters: ', gridSearch.best_params_)

<h4>2. Tune DT</h4>

In [None]:
dt = DecisionTreeClassifier(criterion='gini', max_depth=10)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
pd.crosstab(y_test, y_pred_dt)

<h4>>>>> Visualisasi</h4>

In [None]:
from dmba import plotDecisionTree
X_train = pd.DataFrame(X_train, columns = X.columns)
X_test = pd.DataFrame(X_test, columns=X.columns)

plotDecisionTree(dt, feature_names=X_train.columns, class_names=dt.classes_)

<h4>3. Menghitung Akurasi</h4>

In [None]:
print(classification_report(y_test, y_pred_dt))
cm = confusion_matrix(y_test, y_pred_dt)
print(cm)
print('Recall:{}'.format(recall_score(y_test, y_pred_dt, average='macro')))
print('Precision:{}'.format(precision_score(y_test, y_pred_dt, average='macro')))
print('F1-Score:{}'.format(f1_score(y_test, y_pred_dt, average='macro')))

In [None]:
cm = confusion_matrix(y_test, y_pred_dt)
accuracy = accuracy_score(y_test, y_pred_dt)*100
print("Accuracy with DTC: {0:.2f}%".format(accuracy))

plt.figure(figsize=(3.5,2.5))
p = sns.heatmap(pd.DataFrame(cm), annot=True, cmap="YlGnBu", fmt='g')
plt.title('Confusion matrix DTC', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

<h4>1. Preprocessing</h4>

<h1>METODE 4: Multilayer Perceptron (mlp)</h1>

In [None]:
from sklearn.neural_network import MLPClassifier

<h4>1. Menentukan parameter terbaik</h4>

In [None]:
cv = ShuffleSplit(n_splits=1, test_size=0.2, random_state=8)

param_grid = {
    'activation': ['identity','logistic','tanh','relu'],
    'solver': ['lbfgs','sgd','adam']
    }

gridSearch = GridSearchCV(MLPClassifier(), param_grid, cv=cv,
                          scoring=['recall','f1'],refit='f1',verbose=2)
gridSearch.fit(X_train, y_train)
print('Score: ', gridSearch.best_score_)
print('Parameters: ', gridSearch.best_params_)

<h4>2. Tune MLP</h4>

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=10,activation='identity', solver='lbfgs', max_iter=1000,
                            random_state=8)

mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)
pd.crosstab(y_test, y_pred_mlp)

In [None]:
print(classification_report(y_test, y_pred_mlp))
cm = confusion_matrix(y_test, y_pred_mlp)
print(cm)
print('Recall:{}'.format(recall_score(y_test, y_pred_mlp, average='macro')))
print('Precision:{}'.format(precision_score(y_test, y_pred_mlp, average='macro')))
print('F1-Score:{}'.format(f1_score(y_test, y_pred_mlp, average='macro')))

In [None]:
cm = confusion_matrix(y_test, y_pred_mlp)
accuracy = accuracy_score(y_test, y_pred_mlp)*100
print("Accuracy with MLP: {0:.2f}%".format(accuracy))

plt.figure(figsize=(3.5,2.5))
p = sns.heatmap(pd.DataFrame(cm), annot=True, cmap="YlGnBu", fmt='g')
plt.title('Confusion matrix MLP', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

<h1>COMPARING METHODE</h1>