In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

from imblearn.combine import SMOTEENN
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.over_sampling import RandomOverSampler, ADASYN

import keras
import tensorflow
from keras import layers
from keras import metrics
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Embedding, Conv1D, MaxPooling1D, Reshape
from keras.preprocessing import sequence
from keras.utils import pad_sequences

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, plot_confusion_matrix, f1_score, precision_score, recall_score, accuracy_score

In [2]:
X_train = pd.read_csv('train_data_imputed_final.csv')
X_test = pd.read_csv('test_data_imputed_final.csv')
y_train = pd.read_csv('y_train_final.csv')
y_test = pd.read_csv('y_test_final.csv')

In [3]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(11516, 263)
(11516, 2)
(2880, 263)
(2880, 2)


In [4]:
y_train = y_train.drop(columns=['Unnamed: 0'])
y_test = y_test.drop(columns=['Unnamed: 0'])

## Random Oversampler

In [5]:
ros = RandomOverSampler(sampling_strategy = 1, random_state=42)

In [6]:
X_res, y_res = ros.fit_resample(X_train, y_train)

In [7]:
y_res.value_counts()

fraudulent
0             11293
1             11293
dtype: int64

In [8]:
len(X_res)

22586

### Normal NN

In [35]:
### Model building
model = Sequential()
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])

In [36]:
model.fit(X_res, y_res, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1e40de0ceb0>

In [37]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_15 (Dense)            (None, 256)               67584     
                                                                 
 dropout_12 (Dropout)        (None, 256)               0         
                                                                 
 dense_16 (Dense)            (None, 128)               32896     
                                                                 
 dropout_13 (Dropout)        (None, 128)               0         
                                                                 
 dense_17 (Dense)            (None, 64)                8256      
                                                                 
 dropout_14 (Dropout)        (None, 64)                0         
                                                                 
 dense_18 (Dense)            (None, 32)               

In [38]:
train_score = model.evaluate(X_res, y_res)
print(train_score)

[0.006984109058976173, 0.9995129704475403, 0.999026894569397, 1.0, 0.9999933242797852]


In [45]:
y_tr_pred = model.predict(X_train)



In [46]:
y_t_pred = np.where(y_tr_pred < 0.5, 0, 1)

In [47]:
print(f1_score(y_train, y_t_pred))

0.9759299781181618


In [39]:
pred = model.predict(X_test)



In [40]:
y_pred = np.where(pred < 0.5, 0, 1)

In [41]:
y_train.columns

Index(['fraudulent'], dtype='object')

In [42]:
confusion_matrix(y_test, y_pred)

array([[2815,    9],
       [  15,   41]], dtype=int64)

In [43]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00      2824
           1       0.82      0.73      0.77        56

    accuracy                           0.99      2880
   macro avg       0.91      0.86      0.88      2880
weighted avg       0.99      0.99      0.99      2880



In [44]:
print(accuracy_score(y_test, y_pred))
print(precision_score(y_test, y_pred))
print(recall_score(y_test, y_pred))
print(f1_score(y_test, y_pred))

0.9916666666666667
0.82
0.7321428571428571
0.7735849056603773


### CNN

In [38]:
cnn_model = Sequential()
cnn_model.add(Conv1D(32, 3, activation='relu', input_shape=(X_res.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2, strides=3))
cnn_model.add(Flatten())
cnn_model.add(Dense(256, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(32, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(1, activation='sigmoid'))
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [39]:
cnn_model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           (None, 261, 32)           128       
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 87, 32)           0         
 1D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 2784)              0         
                                                                 
 dense_15 (Dense)            (None, 256)               712960    
                                                                 
 dropout_12 (Dropout)        (None, 256)               0         
                                                                 
 dense_16 (Dense)            (None, 128)               32896     
                                                      

In [40]:
cnn_model.fit(X_res, y_res, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1916585cdf0>

In [41]:
train_score = cnn_model.evaluate(X_res, y_res)
print(train_score)

[0.000473076943308115, 0.9999114274978638]


In [42]:
pred = cnn_model.predict(X_test)



In [43]:
y_pred = np.where(pred < 0.5, 0, 1)

In [44]:
confusion_matrix(y_test, y_pred)

array([[2814,   10],
       [  14,   42]], dtype=int64)

In [45]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2824
           1       0.81      0.75      0.78        56

    accuracy                           0.99      2880
   macro avg       0.90      0.87      0.89      2880
weighted avg       0.99      0.99      0.99      2880



In [46]:
print(accuracy_score(y_test, y_pred))
print(precision_score(y_test, y_pred))
print(recall_score(y_test, y_pred))
print(f1_score(y_test, y_pred))

0.9916666666666667
0.8076923076923077
0.75
0.7777777777777779


## ADASYN

In [25]:
ada = ADASYN(sampling_strategy = 1, random_state=42, n_neighbors = 2)

In [26]:
X_res, y_res = ada.fit_resample(X_train, y_train)

In [27]:
y_res.value_counts()

fraudulent
0             11293
1             11286
dtype: int64

In [28]:
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
print(classification_report(y_test, classifier.predict(X_test)))

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

           0       0.99      1.00      0.99      2824
           1       0.67      0.25      0.36        56

    accuracy                           0.98      2880
   macro avg       0.83      0.62      0.68      2880
weighted avg       0.98      0.98      0.98      2880



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [29]:
classifier = LogisticRegression()
classifier.fit(X_res, y_res)
print(classification_report(y_test, classifier.predict(X_test)))

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

           0       0.99      0.93      0.96      2824
           1       0.15      0.62      0.24        56

    accuracy                           0.92      2880
   macro avg       0.57      0.78      0.60      2880
weighted avg       0.98      0.92      0.95      2880



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### NN

In [30]:
### Model building
model = Sequential()
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [31]:
model.fit(X_res, y_res, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x19458a6ad90>

In [32]:
train_score = model.evaluate(X_res, y_res)
print(train_score)

[0.0030316305346786976, 0.999158501625061]


In [33]:
pred = model.predict(X_test)



In [34]:
y_pred = np.where(pred < 0.5, 0, 1)

In [35]:
y_train.columns

Index(['fraudulent'], dtype='object')

In [36]:
confusion_matrix(y_test, y_pred)

array([[2805,   19],
       [  15,   41]], dtype=int64)

In [37]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      2824
           1       0.68      0.73      0.71        56

    accuracy                           0.99      2880
   macro avg       0.84      0.86      0.85      2880
weighted avg       0.99      0.99      0.99      2880



### CNN

In [38]:
cnn_model = Sequential()
cnn_model.add(Conv1D(32, 3, activation='relu', input_shape=(X_res.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2, strides=3))
cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(32, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(1, activation='sigmoid'))
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [39]:
cnn_model.fit(X_res, y_res, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x194624d2af0>

In [40]:
train_score = cnn_model.evaluate(X_res, y_res)
print(train_score)

[0.007340858690440655, 0.9981398582458496]


In [41]:
pred = cnn_model.predict(X_test)



In [42]:
y_pred = np.where(pred < 0.5, 0, 1)

In [43]:
confusion_matrix(y_test, y_pred)

array([[2806,   18],
       [  23,   33]], dtype=int64)

In [44]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      2824
           1       0.65      0.59      0.62        56

    accuracy                           0.99      2880
   macro avg       0.82      0.79      0.80      2880
weighted avg       0.99      0.99      0.99      2880



# GloVe

In [45]:
X_train = pd.read_csv('datasets/glovetrain_data_imputed_glove.csv')
X_test = pd.read_csv('test_data_imputed_glove.csv')
y_train = pd.read_csv('y_train_glove.csv')
y_test = pd.read_csv('y_test_glove.csv')

In [46]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(11516, 263)
(11516, 1)
(2880, 263)
(2880, 1)


## Random OverSampler

In [47]:
ros = RandomOverSampler(sampling_strategy = 1, random_state=42)

In [48]:
X_res, y_res = ros.fit_resample(X_train, y_train)

In [49]:
y_res.value_counts()

fraudulent
0             11293
1             11293
dtype: int64

In [50]:
len(X_res)

22586

### Normal NN

In [51]:
### Model building
model = Sequential()
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [52]:
model.fit(X_res, y_res, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x19458baa8e0>

In [53]:
train_score = model.evaluate(X_res, y_res)
print(train_score)

[0.016498316079378128, 0.9947755336761475]


In [54]:
pred = model.predict(X_test)



In [55]:
y_pred = np.where(pred < 0.5, 0, 1)

In [56]:
y_train.columns

Index(['fraudulent'], dtype='object')

In [57]:
confusion_matrix(y_test, y_pred)

array([[2778,   46],
       [  25,   31]], dtype=int64)

In [58]:
df_cm = pd.DataFrame(confusion_matrix(y_test, y_pred))

In [59]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99      2824
           1       0.40      0.55      0.47        56

    accuracy                           0.98      2880
   macro avg       0.70      0.77      0.73      2880
weighted avg       0.98      0.98      0.98      2880



### CNN

In [60]:
cnn_model = Sequential()
cnn_model.add(Conv1D(32, 3, activation='relu', input_shape=(X_res.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2, strides=3))
cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(32, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(1, activation='sigmoid'))
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [61]:
cnn_model.fit(X_res, y_res, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x19457228e80>

In [62]:
train_score = cnn_model.evaluate(X_res, y_res)
print(train_score)

[0.025853173807263374, 0.9915876984596252]


In [63]:
pred = cnn_model.predict(X_test)



In [64]:
y_pred = np.where(pred < 0.5, 0, 1)

In [65]:
confusion_matrix(y_test, y_pred)

array([[2732,   92],
       [  17,   39]], dtype=int64)

In [66]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.97      0.98      2824
           1       0.30      0.70      0.42        56

    accuracy                           0.96      2880
   macro avg       0.65      0.83      0.70      2880
weighted avg       0.98      0.96      0.97      2880



## ADASYN

In [67]:
ada = ADASYN(sampling_strategy = 1, random_state=42, n_neighbors = 4)

In [68]:
X_res, y_res = ada.fit_resample(X_train, y_train)

In [69]:
y_res.value_counts()

fraudulent
1             11297
0             11293
dtype: int64

In [70]:
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
print(classification_report(y_test, classifier.predict(X_test)))

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

           0       0.98      1.00      0.99      2824
           1       0.60      0.05      0.10        56

    accuracy                           0.98      2880
   macro avg       0.79      0.53      0.54      2880
weighted avg       0.97      0.98      0.97      2880



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [71]:
classifier = LogisticRegression()
classifier.fit(X_res, y_res)
print(classification_report(y_test, classifier.predict(X_test)))

  y = column_or_1d(y, warn=True)


              precision    recall  f1-score   support

           0       0.99      0.84      0.91      2824
           1       0.07      0.66      0.13        56

    accuracy                           0.83      2880
   macro avg       0.53      0.75      0.52      2880
weighted avg       0.97      0.83      0.89      2880



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### NN

In [72]:
### Model building
model = Sequential()
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [73]:
model.fit(X_res, y_res, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x194063468b0>

In [74]:
train_score = model.evaluate(X_res, y_res)
print(train_score)

[0.019140610471367836, 0.9945994019508362]


In [75]:
pred = model.predict(X_test)



In [76]:
y_pred = np.where(pred < 0.5, 0, 1)

In [77]:
confusion_matrix(y_test, y_pred)

array([[2770,   54],
       [  22,   34]], dtype=int64)

In [78]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.98      0.99      2824
           1       0.39      0.61      0.47        56

    accuracy                           0.97      2880
   macro avg       0.69      0.79      0.73      2880
weighted avg       0.98      0.97      0.98      2880



### CNN

In [79]:
cnn_model = Sequential()
cnn_model.add(Conv1D(32, 3, activation='relu', input_shape=(X_res.shape[1], 1)))
cnn_model.add(MaxPooling1D(pool_size=2, strides=3))
cnn_model.add(Flatten())
cnn_model.add(Dense(128, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(64, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(32, activation='relu'))
cnn_model.add(Dropout(0.2))
cnn_model.add(Dense(1, activation='sigmoid'))
cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [80]:
cnn_model.fit(X_res, y_res, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x194083aa430>

In [81]:
train_score = cnn_model.evaluate(X_res, y_res)
print(train_score)

[0.04103609547019005, 0.9857901930809021]


In [82]:
pred = cnn_model.predict(X_test)



In [83]:
y_pred = np.where(pred < 0.5, 0, 1)

In [84]:
confusion_matrix(y_test, y_pred)

array([[2807,   17],
       [  32,   24]], dtype=int64)

In [85]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      2824
           1       0.59      0.43      0.49        56

    accuracy                           0.98      2880
   macro avg       0.79      0.71      0.74      2880
weighted avg       0.98      0.98      0.98      2880

