In [None]:
# Import necessary libraries
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Activation, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import f1_score, precision_recall_curve, roc_curve, confusion_matrix, auc
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pickle
import json

# Load dataset from CSV
dataFileName = '/kaggle/input/amazon-kaggle-nosmote/Amazon_Kaggle.csv'  # Update this path to your CSV file
# Load the dataset
dataset = np.genfromtxt(dataFileName, delimiter=',', skip_header=1)  # Assuming there is a header to skip
features = dataset[:, 2:-1]  # Omit the first two columns and use all but the last column for features
labels = dataset[:, -1]  # Use the last column as the label

# Split dataset into training and validation sets
x_train, x_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_val = scaler.transform(x_val)

# Reshape after scaling if using CNN
x_train = x_train.reshape(-1, 1, x_train.shape[1], 1)
x_val = x_val.reshape(-1, 1, x_val.shape[1], 1)

# Model setup
input_shape = x_train.shape[1:]  # Ensure this matches the shape expected by your network, adding channels if needed
num_classes = 1  # Binary classification

# Define a modified AlexNet model with adjusted pooling size
def alexnet_model(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(96, (3, 3), strides=(2, 2), activation='relu', padding='same')(inputs)
    x = MaxPooling2D(pool_size=(1, 2))(x)  # Adjusted pooling size
    x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(1, 2))(x)  # Adjusted pooling size
    x = Flatten()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(num_classes, activation='sigmoid')(x)
    return Model(inputs=inputs, outputs=x)


# Initialize the model
input_shape = (1, x_train.shape[2], 1)  # Adjust based on actual reshaped input size
alexnet = alexnet_model(input_shape=input_shape)
alexnet.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])


callbacks = [
    ModelCheckpoint(filepath='alexnet_model_real_life.h5', verbose=1, save_best_only=True),
    ReduceLROnPlateau(factor=0.1, patience=10)
]

# Train the model
epochs = 30  # Adjust as needed
history = alexnet.fit(x_train, y_train, batch_size=64, epochs=epochs, validation_data=(x_val, y_val), shuffle=True, callbacks=callbacks)

# Save the final model
alexnet.save('alexnet_model_real_life.h5')

# Save training history
with open('alexnet_training_history_real_life.pkl', 'wb') as file:
    pickle.dump(history.history, file)

# Evaluate the model on the validation set
scores = alexnet.evaluate(x_val, y_val, verbose=1)
y_preds = (alexnet.predict(x_val) > 0.5).astype(int)
y_pred_binary = (y_preds > 0.5).astype(int)

f1 = f1_score(y_val.ravel(), y_preds.ravel(), average='macro')
tn, fp, fn, tp = confusion_matrix(y_val.ravel(), y_pred_binary.ravel()).ravel()
tpr1 = tp / (tp + fn)
fpr1 = fp / (fp + tn)
cm = confusion_matrix(y_val.ravel(), y_preds.ravel())

# Calculate ROC and Precision-Recall curves
fpr, tpr, _ = roc_curve(y_val.ravel(), y_preds.ravel())
roc_auc = auc(fpr, tpr)
precision, recall, _ = precision_recall_curve(y_val.ravel(), y_preds.ravel())
prc_auc = auc(recall, precision)

# Save metrics and curves
metrics = {
    'Validation Loss': scores[0],
    'Validation Accuracy': scores[1],
    'F1 Score': f1,
    'True Positive Rate' : tpr1,
    'False Positive Rate' : fpr1,
    'Confusion Matrix': cm.tolist(),
    'ROC AUC': roc_auc,
    'PRC AUC': prc_auc,
    'FPR Array': fpr.tolist(),
    'TPR Array': tpr.tolist(),
    'Precision Array': precision.tolist(),
    'Recall Array': recall.tolist()
}
with open('alexnet_evaluation_metrics_real_life.json', 'w') as file:
    json.dump(metrics, file)

# Plot and save ROC Curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.savefig('alexnet_roc_curve_real_life.png')

# Plot and save Precision-Recall Curve
plt.figure()
plt.plot(recall, precision, color='blue', lw=2, label='Precision-Recall curve (area = %0.2f)' % prc_auc)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc="lower left")
plt.savefig('alexnet_precision_recall_curve_real_life.png')

