In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import metrics
import joblib

def SVC():
  print("SVC\n")
  vectorizer = CountVectorizer()

  data = pd.read_csv('train_data_NDS.csv')
  data2 = pd.read_csv('test_data_NDS.csv')

  X = data["Email Text"]
  Y = data["Email Type"].map({'Safe Email': 0, 'Phishing Email': 1})
  X_vectorized = vectorizer.fit_transform(X)

  X2 = data2["Email Text"]
  Y2 = data2["Email Type"].map({'Safe Email': 0, 'Phishing Email': 1})
  X2_vectorized = vectorizer.transform(X2)

  model = svm.SVC()

  model.fit(X_vectorized, Y)

  predictions = model.predict(X2_vectorized)

  conf_matrix = confusion_matrix(Y2, predictions)
  report = classification_report(Y, predictions, target_names=['Safe Email', 'Phishing Email'])

  accuracy = metrics.accuracy_score(Y2, predictions)
  precision = metrics.precision_score(Y2, predictions)
  recall = metrics.recall_score(Y2, predictions)

  print(conf_matrix)
  print(report)

  print(accuracy)
  print(precision)
  print(recall)

  print("\n-----------------\n")

  joblib.dump(model, 'SVC_model.pkl')
  joblib.dump(vectorizer, 'SVC_vectorizer.pkl')

  return model

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
import joblib

def SVCShuffle():
  print("SVC Shuffle\n")
  vectorizer = CountVectorizer()

  data = pd.read_csv('train_data_NDS_shuffle.csv')
  data2 = pd.read_csv('test_data_NDS.csv')

  X = data["Email Text"]
  Y = data["Email Type"].map({'Safe Email': 0, 'Phishing Email': 1})
  X_vectorized = vectorizer.fit_transform(X)

  X2 = data2["Email Text"]
  Y2 = data2["Email Type"].map({'Safe Email': 0, 'Phishing Email': 1})
  X2_vectorized = vectorizer.transform(X2)

  model = svm.SVC()

  model.fit(X_vectorized, Y)

  predictions = model.predict(X2_vectorized)

  conf_matrix = confusion_matrix(Y2, predictions)
  report = classification_report(Y2, predictions, target_names=['Safe Email', 'Phishing Email'])

  accuracy = metrics.accuracy_score(Y2, predictions)
  precision = metrics.precision_score(Y2, predictions)
  recall = metrics.recall_score(Y2, predictions)

  print(conf_matrix)
  print(report)

  print(accuracy)
  print(precision)
  print(recall)

  print("\n-----------------\n")

  joblib.dump(model, 'SVC_shuffle_model.pkl')
  joblib.dump(vectorizer, 'SVC_shuffle_vectorizer.pkl')

  return model


In [None]:
def main():
  SVC()
  SVCShuffle()

if __name__ == "__main__":
  main()

SVC

[[3329  153]
 [ 127 3355]]
                precision    recall  f1-score   support

    Safe Email       0.96      0.96      0.96      3482
Phishing Email       0.96      0.96      0.96      3482

      accuracy                           0.96      6964
     macro avg       0.96      0.96      0.96      6964
  weighted avg       0.96      0.96      0.96      6964

0.9597932222860425
0.9563854047890535
0.9635267087880528

-----------------

SVC Shuffle

[[3329  153]
 [ 127 3355]]
                precision    recall  f1-score   support

    Safe Email       0.96      0.96      0.96      3482
Phishing Email       0.96      0.96      0.96      3482

      accuracy                           0.96      6964
     macro avg       0.96      0.96      0.96      6964
  weighted avg       0.96      0.96      0.96      6964

0.9597932222860425
0.9563854047890535
0.9635267087880528

-----------------

