In [4]:
import numpy as np
import pandas as pd
from numpy import loadtxt
from tensorflow import keras
from keras.layers import Dense, Dropout, LSTM, Flatten
from keras.models import Sequential
from keras.optimizers import Adam
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, confusion_matrix, matthews_corrcoef, roc_auc_score, roc_curve, auc
import matplotlib.pyplot as plt
import itertools

#
# Load data from CSV file
file_path = r"F:\other student data\BKUC MS\BKUC Adill wifi\pca 60.csv"
data = pd.read_csv(file_path)

# Assuming the last column is the target variable and all other columns are features
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

# Fix random seed for reproducibility
np.random.seed(7)

# Define the number of splits for cross-validation
k_folds = 5
folds = list(KFold(n_splits=k_folds, shuffle=True).split(X, Y))
cvscores = []

# Iterate over each fold
for j, (train, test) in enumerate(folds):
    model = Sequential([
        LSTM(128, input_shape=(X.shape[1], 1), return_sequences=True),
        Dropout(0.2),
        LSTM(64),
        Dropout(0.3),
        Dense(32, kernel_initializer='he_uniform', activation='relu'),
        Dense(16, kernel_initializer='he_uniform', activation='relu'),
        Dense(8, kernel_initializer='he_uniform', activation='relu'),
        Dense(1, kernel_initializer='he_uniform', activation='sigmoid')
    ])

    optimizer = Adam(learning_rate=0.001)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    print('\nFold =', j)
    X_train_cv, y_train_cv = X[train], Y[train]
    X_test_cv, y_test_cv = X[test], Y[test]

    # Reshape input to be [samples, time steps, features]
    X_train_cv = X_train_cv.reshape((X_train_cv.shape[0], X_train_cv.shape[1], 1))
    X_test_cv = X_test_cv.reshape((X_test_cv.shape[0], X_test_cv.shape[1], 1))

    # Fit the model
    history = model.fit(X_train_cv, y_train_cv, epochs=20, batch_size=len(X_train_cv), verbose=0)

    # Evaluate the model
    scores = model.evaluate(X_test_cv, y_test_cv, verbose=0)
    print("Test Accuracy:", scores[1] * 100)
    cvscores.append(scores[1] * 100)

# Print the mean and standard deviation of the test accuracies
print("Test Accuracy: %.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))




  super().__init__(**kwargs)



Fold = 0


KeyboardInterrupt: 

In [None]:
# Evaluate the model on the entire training set
_, train_acc = model.evaluate(X_train_cv, y_train_cv, verbose=0)
print('Train Accuracy: %.2f%%' % (train_acc * 100))

# Predicting the test set results
y_pred = (model.predict(X_test_cv) > 0.5).astype(int)

# Calculate accuracy
print("Accuracy:", accuracy_score(y_test_cv, y_pred))

# Confusion matrix and other metrics
conf_matrix = confusion_matrix(y_test_cv, y_pred)
TN, FP, FN, TP = conf_matrix.ravel()
print("True Negatives:", TN)
print("False Positives:", FP)
print("False Negatives:", FN)
print("True Positives:", TP)

# Calculate Precision, Recall, F1 Score, MCC, ROC AUC
Precision = TP / (TP + FP)
Recall = TP / (TP + FN)
F1_Score = 2 * Precision * Recall / (Precision + Recall)
MCC = matthews_corrcoef(y_test_cv, y_pred)
ROC_AUC = roc_auc_score(y_test_cv, y_pred)

print("Precision: {:.2f}".format(Precision))
print("Recall: {:.2f}".format(Recall))
print("F1 Score: {:.2f}".format(F1_Score))
print("MCC: {:.2f}".format(MCC))
print("ROC AUC: {:.2f}".format(ROC_AUC))

# Sensitivity and Specificity
Sensitivity = Recall  # Same as Recall
Specificity = TN / (TN + FP)
print("Sensitivity: {:.2f}".format(Sensitivity))
print("Specificity: {:.2f}".format(Specificity))

# Plot normalized confusion matrix
conf_matrix_normalized = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(8, 6))
plt.imshow(conf_matrix_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Normalized Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, ['0', '1'])
plt.yticks(tick_marks, ['0', '1'])

fmt = '.2f'
thresh = conf_matrix_normalized.max() / 2.
for i, j in itertools.product(range(conf_matrix_normalized.shape[0]), range(conf_matrix_normalized.shape[1])):
    plt.text(j, i, format(conf_matrix_normalized[i, j], fmt),
             horizontalalignment="center",
             color="white" if conf_matrix_normalized[i, j] > thresh else "black")

plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
plt.show()

# Plotting loss during training
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], linewidth=4)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Training Loss'])
plt.grid(True)
plt.savefig(r'F:\other student data\Abbas\loss 3.png')
plt.show()

# Plotting accuracy during training
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], linewidth=4)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Training Accuracy'])
plt.grid(True)
plt.show()

# Plot ROC curve
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test_cv, y_pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
plt.figure(figsize=(10, 5))
plt.plot(false_positive_rate, true_positive_rate, color='blue', label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right')
plt.show()

# Comparing with other classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=0)

rf = RandomForestClassifier(max_features=5, n_estimators=100)
rf.fit(X_train, Y_train)

nb = GaussianNB()
nb.fit(X_train, Y_train)

knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)

svm_clf = SVC(kernel='linear', probability=True)
svm_clf.fit(X_train, Y_train)

# Calculate ROC AUC for each classifier
rf_probs = rf.predict_proba(X_test)[:, 1]
nb_probs = nb.predict_proba(X_test)[:, 1]
knn_probs = knn.predict_proba(X_test)[:, 1]
svm_probs = svm_clf.predict_proba(X_test)[:, 1]

rf_auc = roc_auc_score(Y_test, rf_probs)
nb_auc = roc_auc_score(Y_test, nb_probs)
knn_auc = roc_auc_score(Y_test, knn_probs)
svm_auc = roc_auc_score(Y_test, svm_probs)

print('Random Forest: AUROC = %.3f' % rf_auc)
print('Naive Bayes: AUROC = %.3f' % nb_auc)
print('K Nearest Neighbors: AUROC = %.3f' % knn_auc)
print('SVM: AUROC = %.3f' % svm_auc)

# Plot ROC curves
r_fpr, r_tpr, _ = roc_curve(Y_test, [0 for _ in range(len(Y_test))])
rf_fpr, rf_tpr, _ = roc_curve(Y_test, rf_probs)
nb_fpr, nb_tpr, _ = roc_curve(Y_test, nb_probs)
knn_fpr, knn_tpr, _ = roc_curve(Y_test, knn_probs)
svm_fpr, svm_tpr, _ = roc_curve(Y_test, svm_probs)

plt.figure(figsize=(10, 5))
plt.plot(r_fpr, r_tpr, linestyle='--', label='Random (chance)')
plt.plot(rf_fpr, rf_tpr, marker='.', label='Random Forest')
plt.plot(nb_fpr, nb_tpr, marker='.', label='Naive Bayes')
plt.plot(knn_fpr, knn_tpr, marker='.', label='KNN')
plt.plot(svm_fpr, svm_tpr, marker='.', label='SVM')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
plt.show()