## Importing the Libraries

In [None]:
import numpy as np
import pandas as pd
# from tensorflow.python.keras.layers import Conv1D, MaxPooling1D, Dense, Flatten
# from tensorflow.python.keras.models import Sequential
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from keras.utils import pad_sequences
from keras_preprocessing.text import Tokenizer
from keras import Sequential
from keras.layers import Embedding, MaxPooling1D, Flatten, Dense

## Importing variables from the preprocessing notebook as csv files


In [None]:
y = np.loadtxt('y.csv', delimiter=',')
X = pd.read_csv('X.csv')

X = X = X['preprocessed_text']
X = [str(x) for x in X]

In [None]:
X

## CNN Model

In [None]:
# Tokenize and pad sequences
max_words = 1000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)
X = pad_sequences(sequences, maxlen=max_len)
y = np.array(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Build the CNN model
embedding_dim = 50  # Dimension of word embeddings
filters = 100  # Number of filters in the convolutional layer
kernel_size = 3  # Size of the convolutional kernel

# Build a simple 1D CNN model
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Assuming binary classification


In [None]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Train the model and store the history
history_cnn = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

## Model Evaluation on Training and Validation Data

In [None]:
# Extract training history
history_cnn_dict = history_cnn.history

# Extract metrics
acc_cnn = history_cnn_dict['accuracy']
val_acc_cnn = history_cnn_dict['val_accuracy']
loss_cnn = history_cnn_dict['loss']
val_loss_cnn = history_cnn_dict['val_loss']
epochs_cnn = range(1, len(acc_cnn) + 1)

# Plot training and validation loss
plt.figure(figsize=(15, 10))
plt.plot(epochs_cnn, loss_cnn, 'r', label='Training loss')
plt.plot(epochs_cnn, val_loss_cnn, 'b', label='Validation loss')
plt.title('Training and Validation Loss', size=20)
plt.xlabel('Epochs', size=20)
plt.ylabel('Loss', size=20)
plt.legend(prop={'size': 20})
plt.show()

# Plot training and validation accuracy
plt.figure(figsize=(15, 10))
plt.plot(epochs_cnn, acc_cnn, 'g', label='Training acc')
plt.plot(epochs_cnn, val_acc_cnn, 'b', label='Validation acc')
plt.title('Training and Validation Accuracy', size=20)
plt.xlabel('Epochs', size=20)
plt.ylabel('Accuracy', size=20)
plt.legend(prop={'size': 20})
plt.ylim((0.5,1))
plt.show()

## Model Evaluation on Test Data

In [None]:
# Evaluate the model on the test data
evaluation = model.evaluate(X_test, y_test)

# Extract the evaluation metrics (e.g., loss and accuracy)
loss, accuracy = evaluation

In [None]:
# Get model predictions on the test data
y_pred_probs  = model.predict(X_test)

# Apply a threshold to convert probabilities to binary predictions
threshold = 0.5
y_pred = (y_pred_probs > threshold).astype(int)

# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print the evaluation metrics
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")

## Classification Report

In [None]:
# Generate the classification report
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

## Confusion Matrix

In [None]:
# Generate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Display the confusion matrix using seaborn heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()
