In [1]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, classification_report

In [2]:
df_train = pd.read_csv(r'C:\Users\nadin\OneDrive\Documents\Machine Learning Project\train_dump.csv')

In [3]:
columns_to_convert = df_train.columns[8:-1]
df_train[columns_to_convert] = df_train[columns_to_convert].astype('int8')

In [4]:
df_test = pd.read_csv(r'C:\Users\nadin\OneDrive\Documents\Machine Learning Project\test_dump.csv')

In [5]:
df_test[columns_to_convert] = df_test[columns_to_convert].astype('int8')

In [6]:
# remove the index column thats read from the csv
df_train = df_train.iloc[:, 1:]
df_test = df_test.iloc[:, 1:]

In [7]:
x_train = df_train[df_train.columns[:-1]].to_numpy()
y_train = df_train['label'].to_numpy()

x_test = df_test[df_test.columns[:-1]].to_numpy()
y_test = df_test['label'].to_numpy()

In [8]:
kf = KFold(n_splits=5)

f1s = []
accuracies = []
precisions = []
recalls = []
roc_aucs = []

In [9]:
model = Sequential()
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2)) 
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2)) 
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [10]:
early_stopping = EarlyStopping(monitor='loss', patience=5)

fold = 1
for train, valid in kf.split(x_train, y_train):
    print(f"##### FOLD: {fold} #####")

    # Fit the model
    model.fit(x_train[train], y_train[train], epochs=200, batch_size= 256, callbacks=[early_stopping])

    # Predict on the test set
    predictions = model.predict(x_train[valid])
    predictions = np.argmax(predictions, axis=1)

    # Evaluate the model
    precision = precision_score(y_true=y_train[valid], y_pred=predictions, zero_division=0)
    recall = recall_score(y_true=y_train[valid], y_pred=predictions, zero_division=0)
    accuracy = accuracy_score(y_true=y_train[valid], y_pred=predictions)
    f1 = f1_score(y_true=y_train[valid], y_pred=predictions, zero_division=0)

    # Store the result
    f1s.append(f1)
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)

    # Print the scores for each fold
    print(f"Precision = {precision}")
    print(f"Recall = {recall}")
    print(f"Accuracy = {accuracy}")
    print(f"F1 score = {f1}\n")

    fold += 1

print("\nMean Scores: ")
print(f"Mean F1 score = {np.mean(f1s)}")
print(f"Mean Accuracy = {np.mean(accuracies)}")
print(f"Mean Precision = {np.mean(precisions)}")
print(f"Mean Recall = {np.mean(recalls)}")

##### FOLD: 1 #####
Epoch 1/200
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 4ms/step - accuracy: 0.9957 - loss: 0.0209
Epoch 2/200
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 4ms/step - accuracy: 0.9988 - loss: 0.0033
Epoch 3/200
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 4ms/step - accuracy: 0.9988 - loss: 0.0033
Epoch 4/200
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.9989 - loss: 0.0031
Epoch 5/200
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 4ms/step - accuracy: 0.9989 - loss: 0.0029
Epoch 6/200
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 4ms/step - accuracy: 0.9989 - loss: 0.0029
Epoch 7/200
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.9989 - loss: 0.0029
Epoch 8/200
[1m3750/3750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 0.9989 - l

In [11]:
y_pred = model.predict(x_test)

[1m11311/11311[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 870us/step


In [12]:
y_pred = np.argmax(y_pred, axis=1)

classification = classification_report(y_test, y_pred, zero_division=1)

print("\nClassification Report:\n", classification)


Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.96      0.98      8062
           1       1.00      1.00      1.00    353872

    accuracy                           1.00    361934
   macro avg       1.00      0.98      0.99    361934
weighted avg       1.00      1.00      1.00    361934

