In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np
from tensorflow.keras import layers, models, utils as keras_utils
from sklearn.preprocessing import StandardScaler


In [4]:
# Read and preprocess data
honey = pd.read_csv('honey_optimized/honey0.csv')
honey.replace('-', -1, inplace=True)
honey.dropna(inplace=True)
honey.sample(frac=1, random_state=42)

honey_train = honey.iloc[:1481]
honey_test = honey.iloc[1481:]


df0 = pd.read_csv('filtered/supersampled_detailed_label_0.csv')
df0 = df0.sample(frac=1, random_state=42)


df0_train = df0.iloc[:8000]
df0_test = df0.iloc[8000:]

# Read other datasets
dfs = []
for i in range(10):
    if i == 2 or i == 0:
        continue
    dfs.append(pd.read_csv(f'filtered/supersampled_detailed_label_{i}.csv' 
                           if i in [1,2,4,6,8] else f'filtered/detailed_label_{i}.csv'))

dfs.append(df0_train)
dfs.append(honey_train)

df_train = pd.concat(dfs)
df_train.replace('-', -1, inplace=True)
df_train = df_train.sample(frac=1, random_state=42)
df_test = pd.read_csv('filtered/supersampled_detailed_label_2.csv')


df_test = pd.concat([df_test, honey_test, df0_test])

df_train.dropna(inplace=True)
df_test.dropna(inplace=True)

print(df_train.shape)
print(df_test.shape)
X_train = df_train.iloc[:, :-2]
y_train = df_train.iloc[:, -2]
X_test = df_test.iloc[:, :-2]
y_test = df_test.iloc[:, -2]


num_classes = 2

(58928, 16)
(31673, 16)


In [5]:

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_test = keras_utils.to_categorical(y_test, num_classes)
y_train = keras_utils.to_categorical(y_train, num_classes)

class_weights = {}
class_weights[0] = 0.7
class_weights[1] = 0.3
model = models.Sequential()
model.add(layers.Dense(28, input_dim=14, activation='relu'))
model.add(layers.Dense(56, activation='relu'))
model.add(layers.Dense(2, activation='sigmoid')) 
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Train the model
model.fit(X_train, y_train, epochs=30, batch_size=512, validation_split=0.2, class_weight=class_weights)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7905 - loss: 0.2182 - val_accuracy: 0.8225 - val_loss: 0.3395
Epoch 2/30
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8237 - loss: 0.1304 - val_accuracy: 0.8225 - val_loss: 0.2967
Epoch 3/30
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8226 - loss: 0.1184 - val_accuracy: 0.8225 - val_loss: 0.2770
Epoch 4/30
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8206 - loss: 0.1147 - val_accuracy: 0.8217 - val_loss: 0.2767
Epoch 5/30
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8224 - loss: 0.1123 - val_accuracy: 0.8249 - val_loss: 0.2769
Epoch 6/30
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8243 - loss: 0.1102 - val_accuracy: 0.8248 - val_loss: 0.2603
Epoch 7/30
[1m93/93[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x244bf265930>

In [6]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')

print(f'Test Accuracy: {accuracy}')

y_pred = model.predict(X_test)

y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

report = classification_report(y_test_classes, y_pred_classes)
conf_matrix = confusion_matrix(y_test_classes, y_pred_classes)

print('Classification Report:')
print(report)
print('Confusion Matrix:')
print(conf_matrix)

[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 715us/step - accuracy: 0.5988 - loss: 0.6500
Test Accuracy: 0.795314610004425
Test Accuracy: 0.795314610004425
[1m990/990[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 745us/step
Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.95      0.87     22370
           1       0.79      0.41      0.54      9303

    accuracy                           0.80     31673
   macro avg       0.79      0.68      0.70     31673
weighted avg       0.79      0.80      0.77     31673

Confusion Matrix:
[[21361  1009]
 [ 5474  3829]]
