In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score
from nltk.classify import NaiveBayesClassifier
from nltk.classify.util import accuracy

dataset = "/content/emails.csv"
emailDataFrame = pd.read_csv(dataset)

#droping column email number because its not needed
emailDataFrame = emailDataFrame.drop(columns=['Email No.'])

#droping column prediction in the input and assigning column prediction to output
X = emailDataFrame.drop(columns=['Prediction'])
y = emailDataFrame['Prediction']

#spliting data into trainSet and testSet
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

#converting data formate into nltk supported formate
def convertFormate(X, y):
    dataNltk = []
    for i in range(len(X)):
        feature_dict = {str(word): X.iloc[i][word] for word in X.columns}
        dataNltk.append((feature_dict, y.iloc[i]))
    return dataNltk

dataTrain = convertFormate(X_train, y_train)
dataTest = convertFormate(X_test, y_test)

#using Naive base classifier for training
classifier = NaiveBayesClassifier.train(dataTrain)

#checking accuracing and printing it
accuracy_score = accuracy(classifier, dataTest)
print(f"Accuracy: {accuracy_score:.2f}")

#pridicting label
predic = [classifier.classify(sample[0]) for sample in dataTest]
realData = y_test.values

#calculating confusion matrix
conf_matrix = confusion_matrix(realData, predic)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_report(realData, predic))

#calculating f1 score and printing
f1 = f1_score(realData, predic)
print(f"F1 Score: {f1:.2f}")

