In [None]:
import numpy as np
import random
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.pipeline import Pipeline

In [None]:
data = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/Security/Original_data.xlsx')

data['Full_Text'] = data['Subject'] + " " + data['Email']


In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:

vectorizer = TfidfVectorizer(stop_words='english', max_features=500)
X = vectorizer.fit_transform(data['Full_Text'])
y = data['Email_type'].apply(lambda x: 1 if x == 'Phishing' else 0)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


n_states = X_train.shape[0]
n_actions = 2
q_table = np.zeros((n_states, n_actions))


alpha = 0.1
gamma = 0.6
epsilon = 0.1


for episode in range(1000):
    state_index = np.random.randint(0, n_states)
    if np.random.rand() < epsilon:
        action = np.random.choice([0, 1])
    else:
        action = np.argmax(q_table[state_index])

    reward = 1 if action == y_train.iloc[state_index] else -1
    old_value = q_table[state_index, action]
    next_max = np.max(q_table[state_index])

    q_table[state_index, action] = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)

y_pred = []
for test_vector in X_test:

    similarities = cosine_similarity(test_vector, X_train)

    state_index = np.argmax(similarities)

    predicted_action = np.argmax(q_table[state_index])
    y_pred.append(predicted_action)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

Accuracy: 0.8775510204081632
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.64      0.75        14
           1       0.87      0.97      0.92        35

    accuracy                           0.88        49
   macro avg       0.89      0.81      0.83        49
weighted avg       0.88      0.88      0.87        49

