### Import Required Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

### Sample Dataset

In [2]:
data = pd.DataFrame({
    'email_text': [
        "Congratulations! You won a $1000 gift card. Click to claim now!",
        "Reminder: Your meeting is scheduled for 3 PM today.",
        "Earn money quickly by working from home. Limited offer!",
        "Project report submission deadline extended to Friday.",
        "You have been selected for a free vacation trip to Maldives!",
        "Team lunch planned for next Monday. RSVP soon.",
        "Win a free iPhone! Register at the link below.",
        "Please find attached the updated financial report.",
        "Exclusive deal! 50% discount on all products. Hurry up!",
        "Let's catch up tomorrow to discuss the new proposal."
    ],
    'label': ['spam', 'not spam', 'spam', 'not spam', 'spam',
              'not spam', 'spam', 'not spam', 'spam', 'not spam']
})

# Display first five rows
print(data.head())

                                          email_text     label
0  Congratulations! You won a $1000 gift card. Cl...      spam
1  Reminder: Your meeting is scheduled for 3 PM t...  not spam
2  Earn money quickly by working from home. Limit...      spam
3  Project report submission deadline extended to...  not spam
4  You have been selected for a free vacation tri...      spam


### Encode Target Labels

In [3]:
label_enc = LabelEncoder()
data['label'] = label_enc.fit_transform(data['label'])  # spam=1, not spam=0

### Convert Text Data to Numerical (TF-IDF)

In [4]:
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(data['email_text'])
y = data['label']

###  Split Data into Training and Testing Sets

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

###  Train Classification Models

#### 1. Logistic Regression

In [6]:
log_reg = LogisticRegression() 
log_reg.fit(X_train, y_train) 
y_pred_log = log_reg.predict(X_test)

#### 2. Decision Tree Classifier

In [7]:
dt_clf = DecisionTreeClassifier() 
dt_clf.fit(X_train, y_train) 
y_pred_dt = dt_clf.predict(X_test)

#### 3. Random Forest Classifier

In [8]:
rf_clf = RandomForestClassifier(n_estimators=100) 
rf_clf.fit(X_train, y_train) 
y_pred_rf = rf_clf.predict(X_test)

#### 4️. Support Vector Machine (SVM)

In [9]:
svm_clf = SVC() 
svm_clf.fit(X_train, y_train) 
y_pred_svm = svm_clf.predict(X_test) 

#### 5. K-Nearest Neighbors (KNN)

In [10]:
knn_clf = KNeighborsClassifier(n_neighbors=5) 
knn_clf.fit(X_train, y_train) 
y_pred_knn = knn_clf.predict(X_test) 

### Model Evaluation

In [11]:
models = { 
    "Logistic Regression": y_pred_log, 
    "Decision Tree": y_pred_dt, 
    "Random Forest": y_pred_rf, 
    "SVM": y_pred_svm, 
    "KNN": y_pred_knn 
} 
 
for model_name, y_pred in models.items(): 
    print(f"\n{model_name} Performance:") 
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}") 
    print(f"Precision: {precision_score(y_test, y_pred):.2f}") 
    print(f"Recall: {recall_score(y_test, y_pred):.2f}") 
    print(f"F1 Score: {f1_score(y_test, y_pred):.2f}") 
    print("-" * 50) 


Logistic Regression Performance:
Accuracy: 0.50
Precision: 0.50
Recall: 1.00
F1 Score: 0.67
--------------------------------------------------

Decision Tree Performance:
Accuracy: 0.50
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
--------------------------------------------------

Random Forest Performance:
Accuracy: 0.50
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
--------------------------------------------------

SVM Performance:
Accuracy: 0.50
Precision: 0.50
Recall: 1.00
F1 Score: 0.67
--------------------------------------------------

KNN Performance:
Accuracy: 0.50
Precision: 0.50
Recall: 1.00
F1 Score: 0.67
--------------------------------------------------


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
