# 🚀 Phishing Detection Baseline - SVM Classifier
Dataset: `Phishing_Legitimate_full.csv`

**Steps:**
1. Upload CSV
2. Load & inspect data
3. Preprocess
4. Train/test split
5. Train SVM
6. Evaluate model


In [None]:
# STEP 1: Upload dataset (click the file browser in Colab)
from google.colab import files
uploaded = files.upload()

In [None]:
# STEP 2: Load & Inspect CSV
import pandas as pd
df = pd.read_csv('Phishing_Legitimate_full.csv')
print(df.shape)
df.head()

In [None]:
# STEP 3: Prepare Features & Labels
X = df.drop(columns=['CLASS_LABEL'])
y = df['CLASS_LABEL']

In [None]:
# STEP 4: Train/Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# STEP 5: Train SVM
from sklearn.svm import SVC
model = SVC(kernel='linear')
model.fit(X_train, y_train)

In [None]:
# STEP 6: Evaluate
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
# 🔁 Experiment: Try different random_state values
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

random_states = [0, 1, 7, 21, 42, 99, 123, 2024]

for rs in random_states:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=rs)
    model = SVC(kernel='linear')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Random State {rs} → Accuracy: {acc:.4f}")