In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay, plot_confusion_matrix, confusion_matrix, accuracy_score, recall_score, precision_score, classification_report
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import normalize, to_categorical
from tensorflow.keras.optimizers import RMSprop, SGD
import os


In [2]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: {}, and its: {}".format(len(physical_devices), physical_devices) )
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Num GPUs Available: 1, and its: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


# Load Data

In [3]:
data = pd.read_csv('./9000_feature.csv')

# Preprocessing Data

In [4]:
X = data.drop(columns=['label'])
y = data['label'].values

In [5]:
# robust scaler
scalerRobust = RobustScaler()
X = scalerRobust.fit_transform(X)

In [6]:
# Encode to int label
labelencoder = LabelEncoder()
y = labelencoder.fit_transform(y)

In [7]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 47, stratify=y)

In [8]:
# To categorical Label
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [9]:
print("X_train shape: {}".format(X_train.shape))
print("y_train shape: {}".format(y_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))

X_train shape: (7200, 7)
y_train shape: (7200, 90)
X_test shape: (1800, 7)
y_test shape: (1800, 90)


# Building Model

In [10]:
adam = tf.keras.optimizers.Adam(learning_rate=0.01)

In [11]:
# function create model untuk encoded label [0,1,0,...] (pakai CategoricalCrossentropy)
def create_model(opt):
    model = Sequential()
    model.add(Dense(128, activation='sigmoid', input_dim=X_train.shape[1], name="hidden_layer_1"))
    model.add(Dense(256, activation='sigmoid', input_dim=X_train.shape[1], name="hidden_layer_2"))
    model.add(Dense(y_train.shape[1], activation='softmax', name='output'))
    
    model.compile(
    loss = tf.keras.losses.CategoricalCrossentropy(),
    optimizer = opt,
    metrics = ['accuracy']
    )
    
    return model

In [12]:
# Using kFold cross validation
n_split = 10

def kfold(n_split):
    for train_index, test_index in KFold(n_split).split(X_train):
        xtrain, xtest= X_train[train_index], X_train[test_index]
        ytrain, ytest= y_train[train_index], y_train[test_index]

        model = create_model(adam)
        model.fit(xtrain, ytrain, batch_size=32, epochs=1000, verbose=0)

        print('Model evaluation ',model.evaluate(xtest, ytest))

In [13]:
# kfold(10)

In [None]:
model = create_model(adam)
hist = model.fit(X_train, y_train, batch_size=32, epochs=500, verbose=2)

In [15]:
# Model save
# model.save('model/training_v2')

# Load Model

# savedModel = tf.keras.models.load_model('model/training_v2')
# savedModel.summary()



# savedModel.evaluate(X_test, y_test)

# y_pred = savedModel.predict(X_test)

In [16]:
model.evaluate(X_test, y_test)



[3.7530739307403564, 0.6861110925674438]

In [17]:
y_pred = model.predict(X_test)

In [18]:
def classification_report_categorical(y_test, y_pred):
    matrix = classification_report(y_test, y_pred)
    print('Classification report : \n',matrix)
    result_dict = classification_report(y_test, y_pred, output_dict=True)
    return result_dict

In [19]:
max_ytest = np.argmax(y_test, axis=1)
max_ypred = np.argmax(y_pred, axis=1)

# how to inverted back
y_test_inverted = labelencoder.inverse_transform(max_ytest)
y_pred_inverted = labelencoder.inverse_transform(max_ypred)

In [20]:
report_data = classification_report_categorical(y_test_inverted, y_pred_inverted)

Classification report : 
               precision    recall  f1-score   support

           a       0.81      0.65      0.72        20
          ba       0.81      0.85      0.83        20
          be       0.65      0.65      0.65        20
          bi       0.68      0.65      0.67        20
          bo       0.47      0.40      0.43        20
          bu       0.75      0.75      0.75        20
          ca       0.62      0.40      0.48        20
          ce       0.61      0.70      0.65        20
          ci       0.69      0.55      0.61        20
          co       0.60      0.45      0.51        20
          cu       0.88      0.75      0.81        20
          da       0.63      0.60      0.62        20
          de       0.93      0.65      0.76        20
          di       0.75      0.75      0.75        20
          do       0.81      0.65      0.72        20
          du       0.71      0.50      0.59        20
           e       0.72      0.65      0.68        20
 

In [21]:
# df = pd.DataFrame(report_data).T
# df.to_excel('no_augment.xlsx')

In [22]:
# Confusion Matrix
cm = confusion_matrix(y_test_inverted, y_pred_inverted, labels=labelencoder.classes_) 
# labels digunakan untuk order confusion matrix berdasarkan input labels yang diberikan, jadi untuk plotnya nanti bisa di ketahui 
# https://stackoverflow.com/questions/63653161/how-to-correctly-label-confusion-matrix

# perhitungan confusion matrix multiclass
# https://www.analyticsvidhya.com/blog/2021/06/confusion-matrix-for-multi-class-classification/

In [23]:
# label, pakai cara remove duplicate, tetapi order nya tetap
# xlabel = list(dict.fromkeys(y_test_inverted))
# ylabel = list(dict.fromkeys(y_pred_inverted))

# confusion matrix ordered by y_true(in this case y_test)
# https://stackoverflow.com/questions/63649660/how-to-know-scikit-learn-confusion-matrixs-label-order-and-change-it

In [24]:
# fig, ax = plt.subplots(figsize=(30,30))
# plot = sns.heatmap(cm, annot=True, ax=ax, xticklabels=labelencoder.classes_, yticklabels=labelencoder.classes_, cmap="Blues")

# # title
# plt.title('Confusion Matrix', fontsize = 20) # title with fontsize 20
# plt.xlabel('Predicted Values', fontsize = 15) # x-axis label with fontsize 15
# plt.ylabel('Actual Values', fontsize = 15) # y-axis label with fontsize 15

In [25]:
# # save image
# figure = plot.get_figure()  

# # bg white
# figure.set_facecolor("white")

# figure.savefig('no_augment.png', dpi=400, facecolor=figure.get_facecolor())