In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
#for ignoring all type of warnings
import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

In [3]:
# Load Dataset 
emails_df = pd.read_csv('emails.csv')

In [4]:
# 'Email No.' column ko drop karna
emails_df_cleaned = emails_df.drop(columns=['Email No.'])

In [5]:
# Features (X) aur Target (y) ko separate karna
X = emails_df_cleaned.drop(columns=['Prediction'])
y = emails_df_cleaned['Prediction']

# Data ko train aur test sets mein divide karna (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Data ko scale karna (important for models like SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
# Logistic Regression, Decision Tree aur SVM models ko define karna
logistic_model = LogisticRegression()
decision_tree_model = DecisionTreeClassifier()
svm_model = SVC()

In [8]:
# Model training karna
logistic_model.fit(X_train_scaled, y_train)
decision_tree_model.fit(X_train, y_train)  # Decision Tree scaling ka mohtaaj nahi
svm_model.fit(X_train_scaled, y_train)

In [9]:
# Predictions karna
logistic_preds = logistic_model.predict(X_test_scaled)
decision_tree_preds = decision_tree_model.predict(X_test)
svm_preds = svm_model.predict(X_test_scaled)

In [10]:
# Evaluation metrics ko calculate karna
def evaluate_model(y_test, preds):
    accuracy = accuracy_score(y_test, preds)
    precision = precision_score(y_test, preds)
    recall = recall_score(y_test, preds)
    f1 = f1_score(y_test, preds)
    return accuracy, precision, recall, f1

In [11]:
logistic_eval = evaluate_model(y_test, logistic_preds)
decision_tree_eval = evaluate_model(y_test, decision_tree_preds)
svm_eval = evaluate_model(y_test, svm_preds)

In [12]:
# Results ko print karna
print("Logistic Regression: ", logistic_eval)
print("Decision Tree: ", decision_tree_eval)
print("SVM: ", svm_eval)

Logistic Regression:  (0.970048309178744, 0.9260450160771704, 0.972972972972973, 0.9489291598023064)
Decision Tree:  (0.9256038647342996, 0.8737201365187713, 0.8648648648648649, 0.8692699490662139)
SVM:  (0.9468599033816425, 0.9958847736625515, 0.8175675675675675, 0.8979591836734693)
