# Feature Selection - Top 10, 15, 20 Features

## Feature Selection Algorithms:
- **Boruta** - Wrapper method using Random Forest
- **RFE (Recursive Feature Elimination)** - Recursive elimination
- **Correlation-based Feature Selection** - Filter method
- **ContrastFS (Contrastive Feature Selection)** - Contrastive learning-based selection

In [1]:
# Install required packages if not available
!pip install boruta lightgbm xgboost catboost scikit-learn pandas numpy




[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import time
import warnings
warnings.filterwarnings('ignore')

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier

# Models
from lightgbm import LGBMClassifier

# Boruta
from boruta import BorutaPy

print("All libraries imported successfully!")

All libraries imported successfully!


## Step 1: Load Dataset

In [3]:
# Load the dataset with 63 features
df = pd.read_csv('new_dataset/PhiUSIIL_Phishing_URL_63_Features.csv')

print("=" * 70)
print("DATASET INFORMATION")
print("=" * 70)
print(f"Number of Rows: {len(df)}")
print(f"Number of Columns: {len(df.columns)}")

df.head()

DATASET INFORMATION
Number of Rows: 235795
Number of Columns: 63


Unnamed: 0,FILENAME,URL,URLLength,Domain,DomainLength,IsDomainIP,TLD,URLSimilarityIndex,CharContinuationRate,TLDLegitimateProb,...,NoOfEmptyRef,NoOfExternalRef,has_no_www,num_slashes,num_hyphens,URL_Profanity_Prob,URL_NumberOf_Profanity,URLContent_Profanity_Prob,URLContent_NumberOf_Profanity,label
0,521848.txt,https://www.southbankmosaics.com,32,www.southbankmosaics.com,24,0,com,100.0,1.0,0.522907,...,0,124,0,2,0,0.012189,1,0.01188,1,1
1,31372.txt,https://www.uni-mainz.de,24,www.uni-mainz.de,16,0,de,100.0,0.666667,0.03265,...,0,217,0,2,1,0.027988,0,0.019723,0,1
2,597387.txt,https://www.voicefmradio.co.uk,30,www.voicefmradio.co.uk,22,0,uk,100.0,0.866667,0.028555,...,2,5,0,2,0,0.015063,0,0.000294,1,1
3,554095.txt,https://www.sfnmjournal.com,27,www.sfnmjournal.com,19,0,com,100.0,1.0,0.522907,...,1,31,0,2,0,0.012189,0,0.0,0,1
4,151578.txt,https://www.rewildingargentina.org,34,www.rewildingargentina.org,26,0,org,100.0,1.0,0.079963,...,1,85,0,2,0,0.005476,0,0.002091,48,1


In [4]:
# Prepare features and target
# Exclude non-feature columns (URL, FILENAME, etc.)
exclude_cols = ['URL', 'FILENAME', 'Domain', 'TLD', 'Title']
target_col = 'label'  # Adjust if your target column has different name

# Find the target column
if 'label' in df.columns:
    target_col = 'label'
elif 'Label' in df.columns:
    target_col = 'Label'
elif 'CLASS_LABEL' in df.columns:
    target_col = 'CLASS_LABEL'
else:
    print("Target column not found! Please specify manually.")
    print("Available columns:", df.columns.tolist())

print(f"Target column: {target_col}")
print(f"Target distribution:\n{df[target_col].value_counts()}")

Target column: label
Target distribution:
label
1    134850
0    100945
Name: count, dtype: int64


In [5]:
# Prepare feature matrix X and target vector y
# Get only numeric columns for features
feature_cols = [col for col in df.columns 
                if col not in exclude_cols + [target_col] 
                and df[col].dtype in ['int64', 'float64', 'int32', 'float32']]

print(f"Number of feature columns: {len(feature_cols)}")

X = df[feature_cols].copy()
y = df[target_col].copy()

# Encode target if needed
if y.dtype == 'object':
    le = LabelEncoder()
    y = le.fit_transform(y)
    print(f"\nTarget encoded: {le.classes_}")

# Handle missing values
X = X.fillna(0)

print(f"\nFeature matrix shape: {X.shape}")
print(f"Target vector shape: {y.shape}")

Number of feature columns: 57

Feature matrix shape: (235795, 57)
Target vector shape: (235795,)


In [6]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("=" * 70)
print("DATA SPLIT")
print("=" * 70)
print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Number of features: {X_train.shape[1]}")

DATA SPLIT
Training set: 188636 samples
Test set: 47159 samples
Number of features: 57


---
## Step 2: Feature Selection Algorithms

### 2.1 Boruta Feature Selection

In [7]:
# Boruta Feature Selection
print("=" * 70)
print("BORUTA FEATURE SELECTION")
print("=" * 70)

# Initialize Random Forest for Boruta
rf = RandomForestClassifier(n_jobs=-1, class_weight='balanced', max_depth=5, random_state=42)

# Initialize Boruta
boruta_selector = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=42, max_iter=100)

# Fit Boruta
start_time = time.time()
boruta_selector.fit(X_train.values, y_train)
boruta_time = time.time() - start_time

# Get selected features
boruta_features = X_train.columns[boruta_selector.support_].tolist()

# Train RF to get importance scores
rf_for_importance = RandomForestClassifier(n_jobs=-1, class_weight='balanced', 
                                            max_depth=5, random_state=42, n_estimators=100)
rf_for_importance.fit(X_train, y_train)

# Create importance scores
boruta_importance_df = pd.DataFrame({
    'feature': X_train.columns,
    'importance': rf_for_importance.feature_importances_,
    'boruta_rank': boruta_selector.ranking_,
    'selected': boruta_selector.support_
})

# Sort by importance
boruta_importance_df = boruta_importance_df.sort_values('importance', ascending=False)

print(f"\nBoruta completed in {boruta_time:.2f} seconds")
print(f"Selected {len(boruta_features)} features out of {X_train.shape[1]}")

BORUTA FEATURE SELECTION
Iteration: 	1 / 100
Confirmed: 	0
Tentative: 	57
Rejected: 	0
Iteration: 	2 / 100
Confirmed: 	0
Tentative: 	57
Rejected: 	0
Iteration: 	3 / 100
Confirmed: 	0
Tentative: 	57
Rejected: 	0
Iteration: 	4 / 100
Confirmed: 	0
Tentative: 	57
Rejected: 	0
Iteration: 	5 / 100
Confirmed: 	0
Tentative: 	57
Rejected: 	0
Iteration: 	6 / 100
Confirmed: 	0
Tentative: 	57
Rejected: 	0
Iteration: 	7 / 100
Confirmed: 	0
Tentative: 	57
Rejected: 	0
Iteration: 	8 / 100
Confirmed: 	49
Tentative: 	8
Rejected: 	0
Iteration: 	9 / 100
Confirmed: 	49
Tentative: 	8
Rejected: 	0
Iteration: 	10 / 100
Confirmed: 	49
Tentative: 	8
Rejected: 	0
Iteration: 	11 / 100
Confirmed: 	49
Tentative: 	8
Rejected: 	0
Iteration: 	12 / 100
Confirmed: 	49
Tentative: 	8
Rejected: 	0
Iteration: 	13 / 100
Confirmed: 	49
Tentative: 	8
Rejected: 	0
Iteration: 	14 / 100
Confirmed: 	49
Tentative: 	8
Rejected: 	0
Iteration: 	15 / 100
Confirmed: 	49
Tentative: 	8
Rejected: 	0
Iteration: 	16 / 100
Confirmed: 	49
Ten

### 2.2 RFE (Recursive Feature Elimination)

In [8]:
# RFE Feature Selection
print("=" * 70)
print("RFE (RECURSIVE FEATURE ELIMINATION)")
print("=" * 70)

# Use LightGBM as base estimator for RFE
lgbm_estimator = LGBMClassifier(n_estimators=100, random_state=42, verbose=-1)

# Select top 20 features
n_features_to_select = 20
rfe_selector = RFE(estimator=lgbm_estimator, n_features_to_select=n_features_to_select, step=1)

start_time = time.time()
rfe_selector.fit(X_train, y_train)
rfe_time = time.time() - start_time

# Get selected features
rfe_features = X_train.columns[rfe_selector.support_].tolist()

# Train LightGBM to get importance scores
lgbm_for_importance = LGBMClassifier(n_estimators=100, random_state=42, verbose=-1)
lgbm_for_importance.fit(X_train, y_train)

rfe_importance_df = pd.DataFrame({
    'feature': X_train.columns,
    'importance': lgbm_for_importance.feature_importances_,
    'rfe_rank': rfe_selector.ranking_,
    'selected': rfe_selector.support_
})

# Sort by importance
rfe_importance_df = rfe_importance_df.sort_values('importance', ascending=False)

print(f"\nRFE completed in {rfe_time:.2f} seconds")
print(f"Selected {len(rfe_features)} features out of {X_train.shape[1]}")

RFE (RECURSIVE FEATURE ELIMINATION)

RFE completed in 123.02 seconds
Selected 20 features out of 57


### 2.3 Correlation-based Feature Selection

In [9]:
# Correlation-based Feature Selection
print("=" * 70)
print("CORRELATION-BASED FEATURE SELECTION")
print("=" * 70)

start_time = time.time()

# Calculate correlation with target
correlations = pd.DataFrame()
correlations['feature'] = feature_cols
correlations['correlation'] = [abs(X_train[col].corr(pd.Series(y_train))) for col in feature_cols]
correlations = correlations.sort_values('correlation', ascending=False)

# Select features with correlation > threshold or top N features
correlation_threshold = 0.1
corr_features = correlations[correlations['correlation'] >= correlation_threshold]['feature'].tolist()

# Also remove highly correlated features among themselves
corr_matrix = X_train[corr_features].corr().abs()
upper_triangle = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
to_drop = [column for column in upper_triangle.columns if any(upper_triangle[column] > 0.95)]
corr_features_final = [f for f in corr_features if f not in to_drop]

corr_time = time.time() - start_time

print(f"\nCorrelation-based selection completed in {corr_time:.2f} seconds")
print(f"Selected {len(corr_features_final)} features out of {X_train.shape[1]}")

CORRELATION-BASED FEATURE SELECTION

Correlation-based selection completed in 1.46 seconds
Selected 39 features out of 57


### 2.4 ContrastFS (Contrastive Feature Selection)

In [10]:
# ContrastFS (Contrastive Feature Selection)
print("=" * 70)
print("CONTRASTFS (CONTRASTIVE FEATURE SELECTION)")
print("=" * 70)

start_time = time.time()

def contrastfs_score(X, y, n_samples=5000):
    """
    Calculate ContrastFS scores for each feature.
    Contrast Score = inter-class distance / intra-class distance
    """
    from sklearn.preprocessing import StandardScaler
    
    # Sample data for efficiency (large datasets)
    if len(X) > n_samples:
        np.random.seed(42)
        indices = np.random.choice(len(X), n_samples, replace=False)
        X_sample = X.iloc[indices].values
        y_sample = np.array(y)[indices] if hasattr(y, '__iter__') else y.iloc[indices].values
    else:
        X_sample = X.values
        y_sample = np.array(y)
    
    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_sample)
    
    # Get unique classes
    classes = np.unique(y_sample)
    n_features = X_scaled.shape[1]
    
    contrast_scores = []
    
    for feat_idx in range(n_features):
        feature_values = X_scaled[:, feat_idx]
        
        # Calculate intra-class variance
        intra_class_var = 0
        for c in classes:
            class_mask = (y_sample == c)
            class_values = feature_values[class_mask]
            if len(class_values) > 1:
                intra_class_var += np.var(class_values) * len(class_values)
        intra_class_var /= len(y_sample)
        
        # Calculate inter-class distance
        class_means = []
        class_sizes = []
        for c in classes:
            class_mask = (y_sample == c)
            class_values = feature_values[class_mask]
            class_means.append(np.mean(class_values))
            class_sizes.append(len(class_values))
        
        # Weighted inter-class distance
        inter_class_dist = 0
        total_pairs = 0
        for i in range(len(classes)):
            for j in range(i + 1, len(classes)):
                weight = class_sizes[i] * class_sizes[j]
                inter_class_dist += weight * (class_means[i] - class_means[j]) ** 2
                total_pairs += weight
        
        if total_pairs > 0:
            inter_class_dist /= total_pairs
        
        # Contrast Score
        epsilon = 1e-10
        contrast_score = inter_class_dist / (intra_class_var + epsilon)
        contrast_scores.append(contrast_score)
    
    # Create results DataFrame
    results_df = pd.DataFrame({
        'feature': X.columns,
        'contrast_score': contrast_scores
    })
    results_df = results_df.sort_values('contrast_score', ascending=False)
    
    return results_df

# Apply ContrastFS
contrastfs_df = contrastfs_score(X_train, y_train, n_samples=10000)

# Normalize scores to [0, 1]
contrastfs_df['normalized_score'] = contrastfs_df['contrast_score'] / contrastfs_df['contrast_score'].max()

# Select top 20 features
n_top_features = 20
contrastfs_features = contrastfs_df.head(n_top_features)['feature'].tolist()

contrastfs_time = time.time() - start_time

print(f"\nContrastFS completed in {contrastfs_time:.2f} seconds")
print(f"Selected {len(contrastfs_features)} features out of {X_train.shape[1]}")

# Store for later comparison
contrast_features = contrastfs_features
contrast_time = contrastfs_time

CONTRASTFS (CONTRASTIVE FEATURE SELECTION)

ContrastFS completed in 0.06 seconds
Selected 20 features out of 57


---
## Step 3: Top 10, Top 15, Top 20 Features per Method

In [11]:
# Display Top 10, 15, 20 Features for Each Method

def display_top_features(importance_df, method_name, feature_col='feature'):
    """Display Top 10, 15, 20 features with incremental info for Top 15"""
    
    print("=" * 80)
    print(f"{method_name} - TOP FEATURES")
    print("=" * 80)
    
    # Get Top 10, 15, 20
    top10 = importance_df.head(10)[feature_col].tolist()
    top15 = importance_df.head(15)[feature_col].tolist()
    top20 = importance_df.head(20)[feature_col].tolist()
    
    # Top 10
    print("\nüìå TOP 10:")
    for i, f in enumerate(top10, 1):
        print(f"  {i:2}. {f}")
    
    # Top 15 - show which 5 are additional
    additional_5 = [f for f in top15 if f not in top10]
    print("\nüìå TOP 15:")
    for i, f in enumerate(top15, 1):
        marker = " ‚¨ÖÔ∏è (NEW)" if f in additional_5 else ""
        print(f"  {i:2}. {f}{marker}")
    
    # Top 20 - show which 5 are additional from top 15
    additional_5_from_15 = [f for f in top20 if f not in top15]
    print("\nüìå TOP 20:")
    for i, f in enumerate(top20, 1):
        marker = " ‚¨ÖÔ∏è (NEW)" if f in additional_5_from_15 else ""
        print(f"  {i:2}. {f}{marker}")
    
    return {'top10': top10, 'top15': top15, 'top20': top20}

# Display for each method
print("\n")
boruta_tops = display_top_features(boruta_importance_df, "BORUTA (Random Forest Importance)")
print("\n")
rfe_tops = display_top_features(rfe_importance_df, "RFE (LightGBM Importance)")
print("\n")
corr_tops = display_top_features(correlations, "CORRELATION-BASED", feature_col='feature')
print("\n")
contrast_tops = display_top_features(contrastfs_df, "CONTRASTFS (Contrastive Selection)")



BORUTA (Random Forest Importance) - TOP FEATURES

üìå TOP 10:
   1. URLSimilarityIndex
   2. LineOfCode
   3. NoOfExternalRef
   4. NoOfSelfRef
   5. NoOfJS
   6. HasDescription
   7. NoOfImage
   8. HasSocialNet
   9. NoOfCSS
  10. HasCopyrightInfo

üìå TOP 15:
   1. URLSimilarityIndex
   2. LineOfCode
   3. NoOfExternalRef
   4. NoOfSelfRef
   5. NoOfJS
   6. HasDescription
   7. NoOfImage
   8. HasSocialNet
   9. NoOfCSS
  10. HasCopyrightInfo
  11. has_no_www ‚¨ÖÔ∏è (NEW)
  12. LargestLineLength ‚¨ÖÔ∏è (NEW)
  13. IsHTTPS ‚¨ÖÔ∏è (NEW)
  14. num_slashes ‚¨ÖÔ∏è (NEW)
  15. DomainTitleMatchScore ‚¨ÖÔ∏è (NEW)

üìå TOP 20:
   1. URLSimilarityIndex
   2. LineOfCode
   3. NoOfExternalRef
   4. NoOfSelfRef
   5. NoOfJS
   6. HasDescription
   7. NoOfImage
   8. HasSocialNet
   9. NoOfCSS
  10. HasCopyrightInfo
  11. has_no_www
  12. LargestLineLength
  13. IsHTTPS
  14. num_slashes
  15. DomainTitleMatchScore
  16. NoOfOtherSpecialCharsInURL ‚¨ÖÔ∏è (NEW)
  17. NoOfiFrame ‚¨ÖÔ∏è (NEW)


---
## Step 4: Feature Selection Summary

In [12]:
# Summary of all feature selection methods
print("=" * 70)
print("FEATURE SELECTION SUMMARY")
print("=" * 70)

selection_summary = {
    'Method': ['All Features', 'Boruta', 'RFE', 'Correlation-based', 'ContrastFS'],
    'Num Features': [
        len(feature_cols),
        len(boruta_features),
        len(rfe_features),
        len(corr_features_final),
        len(contrast_features)
    ],
    'Selection Time (s)': [
        0,
        round(boruta_time, 2),
        round(rfe_time, 2),
        round(corr_time, 2),
        round(contrast_time, 2)
    ]
}

summary_df = pd.DataFrame(selection_summary)
print(summary_df.to_string(index=False))

# Find common features across all methods
common_features = set(boruta_features) & set(rfe_features) & set(corr_features_final) & set(contrast_features)
print(f"\nCommon features across all methods: {len(common_features)}")
for f in common_features:
    print(f"  - {f}")

FEATURE SELECTION SUMMARY
           Method  Num Features  Selection Time (s)
     All Features            57                0.00
           Boruta            52             2029.06
              RFE            20              123.02
Correlation-based            39                1.46
       ContrastFS            20                0.06

Common features across all methods: 6
  - CharContinuationRate
  - URLCharProb
  - URLSimilarityIndex
  - IsHTTPS
  - HasDescription
  - SpacialCharRatioInURL


### Top 10, 15, 20 Comparison Tables

In [13]:
# Create comparison tables for Top 10, 15, 20

def create_comparison_table(n_top):
    """Create comparison table for specified top N features"""
    return pd.DataFrame({
        'Rank': range(1, n_top + 1),
        'Boruta': boruta_importance_df.head(n_top)['feature'].values,
        'RFE': rfe_importance_df.head(n_top)['feature'].values,
        'Correlation': correlations.head(n_top)['feature'].values,
        'ContrastFS': contrastfs_df.head(n_top)['feature'].values
    })

print("=" * 100)
print("TOP 10 FEATURES COMPARISON")
print("=" * 100)
top10_table = create_comparison_table(10)
print(top10_table.to_string(index=False))

print("\n" + "=" * 100)
print("TOP 15 FEATURES COMPARISON")
print("=" * 100)
top15_table = create_comparison_table(15)
print(top15_table.to_string(index=False))

print("\n" + "=" * 100)
print("TOP 20 FEATURES COMPARISON")
print("=" * 100)
top20_table = create_comparison_table(20)
print(top20_table.to_string(index=False))

TOP 10 FEATURES COMPARISON
 Rank             Boruta                   RFE           Correlation            ContrastFS
    1 URLSimilarityIndex            LineOfCode    URLSimilarityIndex    URLSimilarityIndex
    2         LineOfCode     LargestLineLength          HasSocialNet          HasSocialNet
    3    NoOfExternalRef       NoOfExternalRef      HasCopyrightInfo      HasCopyrightInfo
    4        NoOfSelfRef           URLCharProb        HasDescription        HasDescription
    5             NoOfJS      LetterRatioInURL            has_no_www            has_no_www
    6     HasDescription SpacialCharRatioInURL               IsHTTPS               IsHTTPS
    7          NoOfImage               IsHTTPS DomainTitleMatchScore       HasSubmitButton
    8       HasSocialNet             URLLength       HasSubmitButton DomainTitleMatchScore
    9            NoOfCSS                NoOfJS          IsResponsive          IsResponsive
   10   HasCopyrightInfo    URLSimilarityIndex    URLTitleMatch

---
## Step 5: Export Results to CSV

In [14]:
# Export all Top Features to CSV: TopFeaturesSelectionAll.csv

# Create comprehensive DataFrame with all methods and all top rankings
max_features = 20

# Prepare data for export
export_data = {
    'Rank': list(range(1, max_features + 1)),
    'Boruta_Feature': boruta_importance_df.head(max_features)['feature'].values.tolist(),
    'Boruta_Score': boruta_importance_df.head(max_features)['importance'].round(6).values.tolist(),
    'RFE_Feature': rfe_importance_df.head(max_features)['feature'].values.tolist(),
    'RFE_Score': rfe_importance_df.head(max_features)['importance'].values.tolist(),
    'Correlation_Feature': correlations.head(max_features)['feature'].values.tolist(),
    'Correlation_Score': correlations.head(max_features)['correlation'].round(6).values.tolist(),
    'ContrastFS_Feature': contrastfs_df.head(max_features)['feature'].values.tolist(),
    'ContrastFS_Score': contrastfs_df.head(max_features)['normalized_score'].round(6).values.tolist(),
}

# Add Top category column
top_category = []
for i in range(1, max_features + 1):
    if i <= 10:
        top_category.append('Top10')
    elif i <= 15:
        top_category.append('Top15')
    else:
        top_category.append('Top20')

export_data['Top_Category'] = top_category

# Create DataFrame
export_df = pd.DataFrame(export_data)

# Reorder columns
export_df = export_df[['Rank', 'Top_Category', 
                        'Boruta_Feature', 'Boruta_Score',
                        'RFE_Feature', 'RFE_Score',
                        'Correlation_Feature', 'Correlation_Score',
                        'ContrastFS_Feature', 'ContrastFS_Score']]

# Save to CSV
output_filename = 'TopFeaturesSelectionAll.csv'
export_df.to_csv(output_filename, index=False)

print("=" * 70)
print("EXPORT COMPLETED")
print("=" * 70)
print(f"\n‚úÖ Saved to: {output_filename}")
print(f"\nFile contains {len(export_df)} rows with Top 10, 15, 20 features from all 4 methods.")
print("\nPreview:")
print(export_df.to_string(index=False))

EXPORT COMPLETED

‚úÖ Saved to: TopFeaturesSelectionAll.csv

File contains 20 rows with Top 10, 15, 20 features from all 4 methods.

Preview:
 Rank Top_Category                Boruta_Feature  Boruta_Score               RFE_Feature  RFE_Score   Correlation_Feature  Correlation_Score    ContrastFS_Feature  ContrastFS_Score
    1        Top10            URLSimilarityIndex      0.216753                LineOfCode        499    URLSimilarityIndex           0.860443    URLSimilarityIndex          1.000000
    2        Top10                    LineOfCode      0.154901         LargestLineLength        471          HasSocialNet           0.783682          HasSocialNet          0.570522
    3        Top10               NoOfExternalRef      0.115893           NoOfExternalRef        277      HasCopyrightInfo           0.742820      HasCopyrightInfo          0.456686
    4        Top10                   NoOfSelfRef      0.093042               URLCharProb        269        HasDescription           0.

In [15]:
# Final Summary - Top 15 Additional Features (compared to Top 10)
print("=" * 80)
print("TOP 15 ADDITIONAL FEATURES (5 new features compared to Top 10)")
print("=" * 80)

methods = {
    'Boruta': boruta_importance_df,
    'RFE': rfe_importance_df,
    'Correlation': correlations,
    'ContrastFS': contrastfs_df
}

for method_name, df in methods.items():
    top10 = df.head(10)['feature'].tolist()
    top15 = df.head(15)['feature'].tolist()
    additional_5 = [f for f in top15 if f not in top10]
    
    print(f"\nüìå {method_name} - 5 Additional Features (Rank 11-15):")
    for i, f in enumerate(additional_5, 11):
        print(f"   {i}. {f}")

TOP 15 ADDITIONAL FEATURES (5 new features compared to Top 10)

üìå Boruta - 5 Additional Features (Rank 11-15):
   11. has_no_www
   12. LargestLineLength
   13. IsHTTPS
   14. num_slashes
   15. DomainTitleMatchScore

üìå RFE - 5 Additional Features (Rank 11-15):
   11. NoOfCSS
   12. URL_Profanity_Prob
   13. NoOfLettersInURL
   14. NoOfSelfRef
   15. NoOfSubDomain

üìå Correlation - 5 Additional Features (Rank 11-15):
   11. SpacialCharRatioInURL
   12. HasHiddenFields
   13. HasFavicon
   14. num_slashes
   15. URLCharProb

üìå ContrastFS - 5 Additional Features (Rank 11-15):
   11. SpacialCharRatioInURL
   12. HasHiddenFields
   13. num_slashes
   14. HasFavicon
   15. URLCharProb
