In [1]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import joblib


In [2]:
df = pd.read_csv("malicious_phish.csv") 

In [3]:
def clean_url(url):
    url = re.sub(r'https?://', '', url)
    url = re.sub(r'www\.', '', url)
    return url

In [4]:
df['url'] = df['url'].apply(clean_url)

In [5]:
# Step 3: Label Mapping (malicious = 1, benign = 0)
df['label'] = df['type'].apply(lambda x: 0 if x == 'benign' else 1)

In [6]:
# Step 4: Use sample to speed up training
df = df.sample(n=30000, random_state=42)  # Reduce size for quick testing

In [7]:
# Step 5: Feature Extraction (limit features for speed)
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['url'])
y = df['label']

In [8]:
# Step 6: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Step 7: Train Faster Model (Logistic Regression)
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

In [10]:
# Step 8: Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\n✅ Model Accuracy: {accuracy * 100:.2f}%")


✅ Model Accuracy: 87.43%


In [11]:
# Step 9: Save model + vectorizer
joblib.dump((vectorizer, model), "malicious_url_model.pkl")

['malicious_url_model.pkl']

In [12]:
# Step 10: Real-time Prediction
def predict_url(input_url):
    clean = clean_url(input_url)
    features = vectorizer.transform([clean])
    result = model.predict(features)[0]
    return "Malicious" if result == 1 else "Safe"


In [13]:
# Step 11: Run Checker
while True:
    test_url = input("\n🔍 Enter a URL to check (or type 'exit' to quit): ")
    if test_url.lower() == 'exit':
        break
    prediction = predict_url(test_url)
    print(f"Result: The URL is likely ➤ {prediction}")


🔍 Enter a URL to check (or type 'exit' to quit):  http://freemoney.ru/login


Result: The URL is likely ➤ Malicious



🔍 Enter a URL to check (or type 'exit' to quit):  https://testphp.vulnweb.com/


Result: The URL is likely ➤ Safe



🔍 Enter a URL to check (or type 'exit' to quit):  exit
