# IMPORT


In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [2]:
X = pd.read_csv("X_train.csv")
y = pd.read_csv("y_train.csv")
test = pd.read_csv("X_test.csv")

In [3]:
X.shape

(20758, 17)

# DATA SCALAR AND SPLITTING

In [4]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
test = scaler.fit_transform(test)

In [5]:
X

array([[ 1.0041516 ,  0.10569857, -0.00282826, ...,  0.47128803,
        -0.23216606, -0.35887678],
       [-0.99586557, -1.0270519 , -1.60629083, ..., -1.63984638,
         1.72213368, -0.98169796],
       [-0.99586557, -1.0270519 ,  0.12845138, ..., -1.63984638,
        -0.23216606, -1.60451914],
       ...,
       [ 1.0041516 , -0.65766899,  1.36653688, ..., -1.63984638,
        -0.23216606,  0.2639444 ],
       [ 1.0041516 ,  1.76006735, -0.00280536, ..., -1.63984638,
         1.72213368, -0.35887678],
       [ 1.0041516 ,  0.49905134,  1.33206194, ...,  0.47128803,
        -0.23216606,  0.88676559]])

In [6]:

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=7
)

print("Shape of train set:", X_train.shape)
print("Shape of test set:", X_test.shape)


Shape of train set: (16606, 17)
Shape of test set: (4152, 17)


# MODELLING

### IMPORT METRICS

In [7]:
from sklearn.metrics import jaccard_score, f1_score

In [8]:

f1_scores = {'train':{'KNN': 0, 'DT': 0, 'RF': 0, 'LR': 0, 'NN': 0, 'EN_HARD': 0, 'EN_SOFT': 0},
             'test':{'KNN': 0, 'DT': 0, 'RF': 0, 'LR': 0, 'NN': 0, 'EN_HARD': 0, 'EN_SOFT': 0}}
jaccard_scores = {'train':{'KNN': 0, 'DT': 0, 'RF': 0, 'LR': 0, 'NN': 0, 'EN_HARD': 0, 'EN_SOFT': 0}, 
                  'test':{'KNN': 0, 'DT': 0, 'RF': 0, 'LR': 0, 'NN': 0, 'EN_HARD': 0, 'EN_SOFT': 0}}


### K Nearest Neighbor(KNN)

In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [10]:

param_grid = {
    "n_neighbors":range(1, 5),
    "weights": ["uniform", "distance"],
    "metric": ["euclidean", "manhattan", "chebyshev"],
}

knn = KNeighborsClassifier()
grid_search_knn = GridSearchCV(knn, param_grid, scoring="f1", cv=5, n_jobs=-1)
grid_search_knn.fit(X_train, y_train)

print("Best param_grid:")
print(grid_search_knn.best_params_)

y_pred_train = grid_search_knn.predict(X_train)
y_pred_test = grid_search_knn.predict(X_test)

f1_score_train = f1_score(y_train, y_pred_train, average='macro')
f1_score_test = f1_score(y_test, y_pred_test, average='macro')
jaccard_score_train = jaccard_score(y_train, y_pred_train, average='macro')
jaccard_score_test = jaccard_score(y_test, y_pred_test, average='macro')

f1_scores["train"]["KNN"] = f1_score_train
f1_scores["test"]["KNN"] = f1_score_test
jaccard_scores["train"]["KNN"] = jaccard_score_train
jaccard_scores["test"]["KNN"] = jaccard_score_test

print("f1 score on train set:", f1_score_train)
print("f1 score on test set:", f1_score_test)
print("Jaccard score on train set:", jaccard_score_train)
print("Jaccard score on test set:", jaccard_score_test)

 nan nan nan nan nan nan]
  return self._fit(X, y)


Best param_grid:
{'metric': 'euclidean', 'n_neighbors': 1, 'weights': 'uniform'}
f1 score on train set: 1.0
f1 score on test set: 0.733456445371721
Jaccard score on train set: 1.0
Jaccard score on test set: 0.6085192678205634


### Decision Tree(DT)

In [11]:
from sklearn.tree import DecisionTreeClassifier

In [12]:

param_grid = {
    "criterion": ["gini", "entropy"],
    "max_depth": range(5, 10),
    "min_samples_split": range(2, 5),
    "min_samples_leaf": range(1, 5),
}

dt = DecisionTreeClassifier()
grid_search_dt = GridSearchCV(dt, param_grid, scoring="f1", cv=5, n_jobs=-1)
grid_search_dt.fit(X_train, y_train)

print("Best param_grid:")
print(grid_search_dt.best_params_)

y_pred_train = grid_search_dt.predict(X_train)
y_pred_test = grid_search_dt.predict(X_test)

f1_score_train = f1_score(y_train, y_pred_train, average='macro')
f1_score_test = f1_score(y_test, y_pred_test, average='macro')
jaccard_score_train = jaccard_score(y_train, y_pred_train, average='macro')
jaccard_score_test = jaccard_score(y_test, y_pred_test, average='macro')

f1_scores["train"]["DT"] = f1_score_train
f1_scores["test"]["DT"] = f1_score_test
jaccard_scores["train"]["DT"] = jaccard_score_train
jaccard_scores["test"]["DT"] = jaccard_score_test

print("f1 score on train set:", f1_score_train)
print("f1 score on test set:", f1_score_test)
print("Jaccard score on train setn:", jaccard_score_train)
print("Jaccard score on test set:", jaccard_score_test)


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan]


Best param_grid:
{'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 2}
f1 score on train set: 0.8378258598569124
f1 score on test set: 0.8273167373689274
Jaccard score on train setn: 0.7368753648912393
Jaccard score on test set: 0.7225172336218805


### Random Forest(RF)

In [13]:
from sklearn.ensemble import RandomForestClassifier


In [14]:

param_grid = {
    "n_estimators": [50, 100, 150],  
    "criterion": ["gini"],
    "max_depth": range(5, 10),  
    "min_samples_split": range(2, 5),  
    "min_samples_leaf": range(1, 5),  
}

rf = RandomForestClassifier()
grid_search_rf = GridSearchCV(rf, param_grid, scoring="f1_macro", cv=5, n_jobs=-1)
grid_search_rf.fit(X_train, y_train)

print("Best param_grid :")
print(grid_search_rf.best_params_)

y_pred_train = grid_search_rf.predict(X_train)
y_pred_test = grid_search_rf.predict(X_test)

f1_score_train = f1_score(y_train, y_pred_train, average='macro')
f1_score_test = f1_score(y_test, y_pred_test, average='macro')
jaccard_score_train = jaccard_score(y_train, y_pred_train, average='macro')
jaccard_score_test = jaccard_score(y_test, y_pred_test, average='macro')

f1_scores["train"]["RF"] = f1_score_train
f1_scores["test"]["RF"] = f1_score_test
jaccard_scores["train"]["RF"] = jaccard_score_train
jaccard_scores["test"]["RF"] = jaccard_score_test

print("f1 score on train set:", f1_score_train)
print("f1 score on test set:", f1_score_test)
print("Jaccard score on train set:", jaccard_score_train)
print("Jaccard score on test set:", jaccard_score_test)



  return fit_method(estimator, *args, **kwargs)


Best param_grid :
{'criterion': 'gini', 'max_depth': 9, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
f1 score on train set: 0.894828295747951
f1 score on test set: 0.8754399452253081
Jaccard score on train set: 0.8181063660949012
Jaccard score on test set: 0.7875472788896047


### Logistic Regression(LR)

In [15]:
from sklearn.linear_model import LogisticRegression

In [16]:

param_grid = {
    'penalty': [ 'l2', 'none'],
    'C': [0.001, 0.01, 0.1, 1, 10],
    'solver': ['newton-cg',  'liblinear', 'sag'],
    'max_iter': [100, 500, 1000]
}
lr = LogisticRegression()
grid_search_lr = GridSearchCV(lr, param_grid, scoring="f1", cv=5, n_jobs=-1)
grid_search_lr.fit(X_train, y_train)

print("Best param_grid:")
print(grid_search_lr.best_params_)

y_pred_train = grid_search_lr.predict(X_train)
y_pred_test = grid_search_lr.predict(X_test)

f1_score_train = f1_score(y_train, y_pred_train, average='macro')
f1_score_test = f1_score(y_test, y_pred_test, average='macro')
jaccard_score_train = jaccard_score(y_train, y_pred_train, average='macro')
jaccard_score_test = jaccard_score(y_test, y_pred_test, average='macro')

f1_scores["train"]["LR"] = f1_score_train
f1_scores["test"]["LR"] = f1_score_test
jaccard_scores["train"]["LR"] = jaccard_score_train
jaccard_scores["test"]["LR"] = jaccard_score_test

print("f1 score on train set:", f1_score_train)
print("f1 score on test set:", f1_score_test)
print("Jaccard score on train set:", jaccard_score_train)
print("Jaccard score on test set:", jaccard_score_test)



75 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lenovo\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1168, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Best param_grid:
{'C': 0.001, 'max_iter': 100, 'penalty': 'l2', 'solver': 'newton-cg'}
f1 score on train set: 0.6761349100840965
f1 score on test set: 0.6755998635835189
Jaccard score on train set: 0.5415177074113303
Jaccard score on test set: 0.54131892574617


### Neural Network(NN)

In [17]:
from sklearn.neural_network import MLPClassifier

In [18]:

param_grid = {
    "hidden_layer_sizes": [(10, 10), (20, 20), (30, 30)],
    "activation": ["relu", "tanh", "identity"],
    "solver": ["sgd","adam", "lbfgs"],
    "alpha": [0.0001, 0.001, 0.01]  
}

nn = MLPClassifier()
grid_search_nn = GridSearchCV(nn, param_grid, scoring="f1", cv=5, n_jobs=-1)
grid_search_nn.fit(X_train, y_train)

print("Best param_grid :")
print(grid_search_nn.best_params_)

y_pred_train = grid_search_nn.predict(X_train)
y_pred_test = grid_search_nn.predict(X_test)

f1_score_train = f1_score(y_train, y_pred_train, average='macro')
f1_score_test = f1_score(y_test, y_pred_test, average='macro')
jaccard_score_train = jaccard_score(y_train, y_pred_train, average='macro')
jaccard_score_test = jaccard_score(y_test, y_pred_test, average='macro')

f1_scores["train"]["NN"] = f1_score_train
f1_scores["test"]["NN"] = f1_score_test
jaccard_scores["train"]["NN"] = jaccard_score_train
jaccard_scores["test"]["NN"] = jaccard_score_test

print("f1 score on train set:", f1_score_train)
print("f1 score on test set:", f1_score_test)
print("Jaccard score on train set:", jaccard_score_train)
print("Jaccard score on test set:", jaccard_score_test)


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan]
  y = column_or_1d(y, warn=True)


Best param_grid :
{'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (10, 10), 'solver': 'sgd'}
f1 score on train set: 0.8546711153851761
f1 score on test set: 0.847577753267237
Jaccard score on train set: 0.7593993284275681
Jaccard score on test set: 0.7489545018255402




### Soft Voting Ensemble

In [19]:
from sklearn.ensemble import VotingClassifier

In [20]:

knn = KNeighborsClassifier(metric='euclidean', n_neighbors=1, weights='uniform')
dt = DecisionTreeClassifier(criterion='gini', max_depth=10, min_samples_leaf=1, min_samples_split=2)
rf = RandomForestClassifier(criterion= 'gini', max_depth= 10, min_samples_leaf= 1, min_samples_split= 5, n_estimators= 150)
lr = LogisticRegression(penalty="l2", C=0.001,max_iter= 100, solver='newton-cg')
nn = MLPClassifier(hidden_layer_sizes=(10, 10), activation="relu", solver="adam")

ensemble_s = VotingClassifier(estimators=[
    ("knn", knn),
    ("dt", dt),
    ("rf", rf),
    ("lr", lr),
    ("nn", nn),
], voting="soft")

ensemble_s.fit(X_train, y_train)

y_pred_train = ensemble_s.predict(X_train)
y_pred_test = ensemble_s.predict(X_test)

f1_score_train = f1_score(y_train, y_pred_train, average='macro')
f1_score_test = f1_score(y_test, y_pred_test, average='macro')
jaccard_score_train = jaccard_score(y_train, y_pred_train, average='macro')
jaccard_score_test = jaccard_score(y_test, y_pred_test, average='macro')

f1_scores["train"]["EN_SOFT"] = f1_score_train
f1_scores["test"]["EN_SOFT"] = f1_score_test
jaccard_scores["train"]["EN_SOFT"] = jaccard_score_train
jaccard_scores["test"]["EN_SOFT"] = jaccard_score_test

# In ra f1 score và Jaccard score
print("f1 score on train set:", f1_score_train)
print("f1 score on test set:", f1_score_test)
print("Jaccard score on train set:", jaccard_score_train)
print("Jaccard score on test set:", jaccard_score_test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


f1 score on train set: 0.9478649362934652
f1 score on test set: 0.8740793858478147
Jaccard score on train set: 0.9028025707248568
Jaccard score on test set: 0.786571434363772


### Hard Voting Ensemble

In [21]:

knn = KNeighborsClassifier(metric='euclidean', n_neighbors=1, weights='uniform')
dt = DecisionTreeClassifier(criterion='gini', max_depth=10, min_samples_leaf=1, min_samples_split=2)
rf = RandomForestClassifier(criterion= 'gini', max_depth= 10, min_samples_leaf= 1, min_samples_split= 5, n_estimators= 150)
lr = LogisticRegression(penalty="l2", C=0.001,max_iter= 100, solver='newton-cg')
nn = MLPClassifier(hidden_layer_sizes=(10, 10), activation="relu", solver="adam")

ensemble_h = VotingClassifier(estimators=[
    ("knn", knn),
    ("dt", dt),
    ("rf", rf),
    ("lr", lr),
    ("nn", nn),
], voting="hard")

ensemble_h.fit(X_train, y_train)

y_pred_train = ensemble_h.predict(X_train)
y_pred_test = ensemble_h.predict(X_test)

f1_score_train = f1_score(y_train, y_pred_train, average='macro')
f1_score_test = f1_score(y_test, y_pred_test, average='macro')
jaccard_score_train = jaccard_score(y_train, y_pred_train, average='macro')
jaccard_score_test = jaccard_score(y_test, y_pred_test, average='macro')

f1_scores["train"]["EN_HARD"] = f1_score_train
f1_scores["test"]["EN_HARD"] = f1_score_test
jaccard_scores["train"]["EN_HARD"] = jaccard_score_train
jaccard_scores["test"]["EN_HARD"] = jaccard_score_test

print("f1 score on train set:", f1_score_train)
print("f1 score on test set:", f1_score_test)
print("Jaccard score on train set:", jaccard_score_train)
print("Jaccard score on test set:", jaccard_score_test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


f1 score on train set: 0.9246144820934988
f1 score on test set: 0.8766904995092638
Jaccard score on train set: 0.8637764879537263
Jaccard score on test set: 0.7895504752814233


## REPORT 

In [22]:
dict_data = {
    "Algorithm": ["KNN", "Decision Tree", "Random Forest", "Logistic Regression", "Neural Network", "Hard Voting", "Soft Voting"],
    "Jaccard-training": jaccard_scores['train'].values(),
    "F1-score-training": f1_scores['train'].values(),
    "Jaccard-testing": jaccard_scores['test'].values(),
    "F1-score-testing": f1_scores['test'].values()
}

df = pd.DataFrame.from_dict(dict_data)
df = df.set_index("Algorithm")
df

Unnamed: 0_level_0,Jaccard-training,F1-score-training,Jaccard-testing,F1-score-testing
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
KNN,1.0,1.0,0.608519,0.733456
Decision Tree,0.736875,0.837826,0.722517,0.827317
Random Forest,0.818106,0.894828,0.787547,0.87544
Logistic Regression,0.541518,0.676135,0.541319,0.6756
Neural Network,0.759399,0.854671,0.748955,0.847578
Hard Voting,0.863776,0.924614,0.78955,0.87669
Soft Voting,0.902803,0.947865,0.786571,0.874079


# Building a Neural Network Model

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
model = Sequential()
model.add(Dense(24, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(48, activation='relu'))
model.add(Dense(72, activation='relu'))
model.add(Dense(48, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(7, activation='softmax'))
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=50, validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x21a541f3690>

In [24]:

loss, accuracy = model.evaluate(X_test, y_test)
print("Loss on test set:", loss)
print("Accuracy on test set:", accuracy)


Loss on test set: 0.4055521488189697
Accuracy on test set: 0.8612716794013977


In [26]:
from sklearn.metrics import classification_report

y_pred_prob = model.predict(X_test)

y_pred = np.argmax(y_pred_prob, axis=1)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.80      0.83      0.82       623
           1       0.91      0.84      0.88       518
           2       0.69      0.72      0.70       464
           3       0.75      0.74      0.74       499
           4       0.85      0.83      0.84       600
           5       0.94      0.96      0.95       654
           6       1.00      0.99      0.99       794

    accuracy                           0.86      4152
   macro avg       0.85      0.85      0.85      4152
weighted avg       0.86      0.86      0.86      4152



### Model Improvement

In [27]:

model1 = Sequential()

model1.add(Dense(24, input_dim=X_train.shape[1], activation='relu'))
model1.add(Dense(48, activation='relu'))
model1.add(Dense(72, activation='relu'))
model1.add(Dense(48, activation='relu'))
model1.add(Dense(24, activation='relu'))

model1.add(Dropout(0.5))

model1.add(Dense(7, activation='softmax'))

model1.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model1.fit(X_train, y_train, epochs=50, batch_size=64, validation_split=0.2)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [28]:

loss, accuracy = model1.evaluate(X_test, y_test)
print("Loss on test set:", loss)
print("Accuracy on test set:", accuracy)


Loss on test set: 0.46901148557662964
Accuracy on test set: 0.8699421882629395


In [29]:

y_pred_prob = model1.predict(X_test)

y_pred = np.argmax(y_pred_prob, axis=1)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.85      0.81      0.83       623
           1       0.85      0.96      0.90       518
           2       0.71      0.72      0.72       464
           3       0.77      0.73      0.75       499
           4       0.84      0.84      0.84       600
           5       0.96      0.95      0.96       654
           6       1.00      0.99      1.00       794

    accuracy                           0.87      4152
   macro avg       0.86      0.86      0.86      4152
weighted avg       0.87      0.87      0.87      4152



In [30]:

model = Sequential()

model.add(Dense(24, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2)) 
model.add(Dense(48, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(72, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(48, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(24, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(7, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=50, validation_split=0.2)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [31]:
loss, accuracy = model.evaluate(X_test, y_test)
print("Loss on test set:", loss)
print("Accuracy on test set:", accuracy)


Loss on test set: 0.5549184083938599
Accuracy on test set: 0.8092485666275024


In [32]:

y_pred_prob = model1.predict(X_test)

y_pred = np.argmax(y_pred_prob, axis=1)

print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.85      0.81      0.83       623
           1       0.85      0.96      0.90       518
           2       0.71      0.72      0.72       464
           3       0.77      0.73      0.75       499
           4       0.84      0.84      0.84       600
           5       0.96      0.95      0.96       654
           6       1.00      0.99      1.00       794

    accuracy                           0.87      4152
   macro avg       0.86      0.86      0.86      4152
weighted avg       0.87      0.87      0.87      4152

