Binary Classification TCN

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score, confusion_matrix, classification_report
)
import geoopt
import torch
from tcn import TCN
import matplotlib.pyplot as plt


# Load the data from a local file
dataset = pd.read_csv("sorted_preprocessed_acc_data_with_hotspots.csv")
X = dataset.iloc[:,0:37]
#print('X_previous.shape', X.shape)

scaler = StandardScaler()

dataset_X = scaler.fit_transform(X)

y = dataset.iloc[:,37:38]

print('X.shape', X.shape)
print('y.shape', y.shape)

# Split data into train and test sets
'''train_size = int(0.8 * len(dataset))
X_train = dataset_X[:train_size]
y_train = y[:train_size]
X_test = dataset_X[train_size:]
y_test = y[train_size:]'''


train_ratio = 0.80
val_ratio = 0.10
test_ratio = 0.10

train_split = int(len(dataset) * train_ratio)
val_split = int(len(dataset) * (train_ratio + val_ratio))

X_train = dataset_X[:train_split]
y_train = y[:train_split]
X_val = dataset_X[train_split:val_split]
y_val = y[train_split:val_split]
X_test = dataset_X[val_split:]
y_test = y[val_split:]

'''X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)'''


# Define a TCN model for binary classification
model = Sequential()
model.add(TCN(input_shape=(X_train.shape[1], 1), nb_filters=32, kernel_size=3, activation='relu', dropout_rate=0.3))
#model.add(Dense(units=1, activation='linear'))
model.add(Dense(units=1, activation='sigmoid'))  # Use sigmoid activation for binary classification

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # Use binary_crossentropy loss for binary classification
'''X_train = X_train.numpy()
X_val = X_val.numpy()

X_test = X_test.numpy()'''

# Train the model
#model.fit(X_train, y_train, epochs=20, batch_size=32)
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val), shuffle=False)

# Make predictions on test data
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)  # Convert probabilities to binary labels

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes)
recall = recall_score(y_test, y_pred_classes)
f1 = f1_score(y_test, y_pred_classes)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

# Compute ROC curve
fpr, tpr, _ = roc_curve(y_test, y_pred_classes)
roc_auc = roc_auc_score(y_test, y_pred_classes)

# Compute confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_classes).ravel()
tpr_score = tp / (tp + fn)
fpr_score = fp / (fp + tn)
report = classification_report(y_test, y_pred_classes, target_names=['Class 0', 'Class 1'])

print(report)
print("Training Accuracy:", history.history['accuracy'][-1])
print("Validation Accuracy:", history.history['val_accuracy'][-1])
print("Test Accuracy:", accuracy)
print("Training Loss:", history.history['loss'][-1])
print("Validation Loss:", history.history['val_loss'][-1])
print("Test Loss:", model.evaluate(X_test, y_test, verbose=0))
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("True Positive Rate:", tpr_score)
print("False Positive Rate:", fpr_score)

# Plot ROC curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.savefig('/content/drive/MyDrive/conf_proj/figures/tcn_roc_curve.png', dpi=300, bbox_inches='tight')
plt.show()




Multi classification TCN

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import torch
from tcn import TCN
import matplotlib.pyplot as plt


# Load the data from a local file
dataset = pd.read_csv("sorted_preprocessed_acc_data_with_3_Class_hotspots.csv")
X = dataset.iloc[:,0:37]

scaler = StandardScaler()

dataset_X = scaler.fit_transform(X)

y = dataset.iloc[:,37:38]

# Convert the targets to categorical
y = to_categorical(y)

print('X.shape', X.shape)
print('y.shape', y.shape)


train_ratio = 0.80
val_ratio = 0.10
test_ratio = 0.10

train_split = int(len(dataset) * train_ratio)
val_split = int(len(dataset) * (train_ratio + val_ratio))

X_train = dataset_X[:train_split]
y_train = y[:train_split]
X_val = dataset_X[train_split:val_split]
y_val = y[train_split:val_split]
X_test = dataset_X[val_split:]
y_test = y[val_split:]


# Define a hyperbolic neural network model for binary classification
model = Sequential()
model.add(TCN(input_shape=(X_train.shape[1], 1), nb_filters=64, kernel_size=3, activation='relu', dropout_rate=0.1))
model.add(Dense(units=3, activation='softmax'))  # Use softmax activation for multi-class classification

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])  # Use categorical_crossentropy loss for multi-class classification


# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val), shuffle=False)
# Convert history to dataframe and save
hist_df = pd.DataFrame(history.history)
hist_csv_file = 'history_multi_tcn.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)

# Make predictions on test data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)  # Convert probabilities to class labels

# Convert y_test back to class labels
y_test_classes = np.argmax(y_test, axis=1)
# Calculate accuracy
accuracy = accuracy_score(y_test_classes, y_pred_classes)

print("Test Accuracy:", accuracy)
# Evaluate the model using classification report


# Compute confusion matrix
conf_mat = confusion_matrix(y_test_classes, y_pred_classes)
# Calculate the test loss
test_loss, _ = model.evaluate(X_test, y_test, verbose=0)

print("Test Loss:", test_loss)
print("Test Accuracy:", accuracy)
print("Training Accuracy:", history.history['accuracy'][-1])
print("Validation Accuracy:", history.history['val_accuracy'][-1])
print("Confusion Matrix:\n", conf_mat)
# If you want to plot a ROC curve for multi-class classification, it'll be a bit more involved as you'll need to do it for each class separately
# Evaluate the model using classification report
report = classification_report(y_test_classes, y_pred_classes, output_dict=True)
print(report)
# Get precision, recall, and F1-score
precision = report['weighted avg']['precision']
recall = report['weighted avg']['recall']
f1_score = report['weighted avg']['f1-score']

print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1_score)

# Import necessary libraries
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'serif' # specify the default font family to be "serif"
#plt.rcParams['font.serif'] = ['Times New Roman'] + plt.rcParams['font.serif'] # specify the default serif font to be "Times New Roman"

# Define a function to plot the training and validation loss
def plot_loss(history):
    epochs = range(1, len(history.history['loss']) + 1)  # Shift epoch count up by 1

    plt.figure(figsize=(6,6))
    plt.plot(epochs, history.history['loss'], label='Training Loss')
    plt.plot(epochs, history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs', fontsize=18)
    plt.ylabel('Loss', fontsize=18)
    plt.xticks(fontsize=16) # Increase xticks font size
    plt.yticks(fontsize=16)
    plt.legend(fontsize=14)
    plt.grid(True)
    plt.savefig('/content/drive/MyDrive/conf_proj/figures/train_val_loss_multi_tcn.png', dpi=300)  # save the figure with a resolution of 300 dpi
    plt.show()

# Call the function with your history object
plot_loss(history)
