In [15]:
import pandas as pd
import numpy as np
import pickle
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load dataset from OpenML
dataset = fetch_openml("PhishingWebsites", version=1, as_frame=True)
df = dataset.frame

# Convert target column to numeric
y = df["Result"].astype(int)

# Convert categorical features to numeric using Label Encoding
X = df.drop(columns=["Result"])
X = X.apply(LabelEncoder().fit_transform)  # Ensures all features are numeric

# Save feature names
feature_names = X.columns.to_list()

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save the model and feature names
with open("model.pkl", "wb") as file:
    pickle.dump({"model": model, "features": feature_names}, file)

print("✅ Model trained and saved as model.pkl")


✅ Model trained and saved as model.pkl
