# URL Phishing Detection

Training XGBoost classifier for phishing detection.

In [None]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import joblib

# Generate synthetic URL features
np.random.seed(43)
X = np.random.randn(8000, 43)
y = np.random.randint(0, 2, 8000)

# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train XGBoost
model = xgb.XGBClassifier(n_estimators=150, max_depth=8)
model.fit(X_train_scaled, y_train)

# Evaluate
y_pred = model.predict(X_test_scaled)
print(f'Accuracy: {accuracy_score(y_test, y_pred):.4f}')

# Save
joblib.dump(model, '../../backend/models/url_classifier_xgb.pkl')
joblib.dump(scaler, '../../backend/models/scaler_url.pkl')
print('✓ Model saved!')