## 1. Install the ucimlrepo package

In [None]:
pip install lime

## 2. Install Dependencies

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder
from ucimlrepo import fetch_ucirepo

## 3. Fetch dataset from UCI repository

In [None]:
phiusiil_phishing_url_website = fetch_ucirepo(id=967)

## 4. Extract features and target variables as pandas dataframes

X = phiusiil_phishing_url_website.data.features
y = phiusiil_phishing_url_website.data.targets

## 5. Print metadata and variable information for reference

In [None]:
print(phiusiil_phishing_url_website.metadata)
print(phiusiil_phishing_url_website.variables)

## 6. Combine features and target into a single dataframe

In [None]:
df = pd.concat([X, y], axis=1)

## 7. Display the first few rows of the dataset

In [None]:
display(df.head())

## 8. Drop duplicate rows to remove redundancy

In [None]:
df.drop_duplicates(inplace=True)

## 9. Check for missing values before handling them

In [None]:
print("Missing Values Before Handling:")
print(df.isnull().sum())

## 10. Handling missing values: Filling with median values for numerical columns

In [None]:
df.fillna(df.median(numeric_only=True), inplace=True)
print("Missing Values After Handling:")
print(df.isnull().sum())

## 11. Scatter Plot (Without Hue)

In [None]:
# List of selected numerical features
features = [
    "URLLength", "DomainLength", "URLSimilarityIndex", "CharContinuationRate", "TLDLegitimateProb",
    "URLCharProb", "TLDLength", "NoOfSubDomain", "ObfuscationRatio", "LetterRatioInURL",
    "DegitRatioInURL", "SpacialCharRatioInURL", "DomainTitleMatchScore", "URLTitleMatchScore",
    "NoOfPopup", "NoOfiFrame", "NoOfImage", "NoOfCSS", "NoOfJS", "NoOfURLRedirect",
    "NoOfExternalRef", "NoOfSelfRedirect", "NoOfEmptyRef"
]

# Create scatter plots for unique feature combinations
for i in range(len(features)):
    for j in range(i + 1, len(features)):  # Ensures each pair is plotted only once
        feature_x = features[i]
        feature_y = features[j]

        plt.figure(figsize=(18, 5))
        sns.scatterplot(x=df[feature_x], y=df[feature_y], alpha=0.5)

        plt.xlabel(feature_x)
        plt.ylabel(feature_y)
        plt.title(f"{feature_x} vs. {feature_y}")

        