# Python Code Examples Collection

This notebook contains various Python code examples demonstrating different concepts and libraries.

In [None]:
# List comprehensions and functional programming
squares = [x**2 for x in range(1, 11)]
even_squares = [x**2 for x in range(1, 11) if x % 2 == 0]

# Using map, filter, and lambda functions
doubled = list(map(lambda x: x * 2, numbers))
evens = list(filter(lambda x: x % 2 == 0, numbers))
total = sum(numbers)

print("Squares:", squares)
print("Even squares:", even_squares)
print("Doubled numbers:", doubled)
print("Even numbers:", evens)
print("Sum of numbers:", total)

In [None]:
# Object-oriented programming example
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
        self.hobbies = []
    
    def add_hobby(self, hobby):
        self.hobbies.append(hobby)
    
    def introduce(self):
        hobbies_str = ", ".join(self.hobbies) if self.hobbies else "none"
        return f"Hi, I'm {self.name}, {self.age} years old. My hobbies: {hobbies_str}"
    
    def is_adult(self):
        return self.age >= 18

# Create person objects
people = []
for name in names[:3]:
    age = random.randint(16, 65)
    person = Person(name, age)
    person.add_hobby(random.choice(['reading', 'gaming', 'cooking', 'traveling', 'music']))
    people.append(person)

# Display information
for person in people:
    print(person.introduce())
    print(f"Adult: {person.is_adult()}\n")

In [None]:
# Data analysis with dictionaries and collections
# Create sample data
sales_data = {
    'January': random.randint(1000, 5000),
    'February': random.randint(1000, 5000),
    'March': random.randint(1000, 5000),
    'April': random.randint(1000, 5000),
    'May': random.randint(1000, 5000)
}

# Word frequency counter
text = "python is great python is powerful python is versatile programming language"
word_count = Counter(text.split())

# Group data using defaultdict
groups = defaultdict(list)
for person in people:
    age_group = "Young" if person.age < 30 else "Adult"
    groups[age_group].append(person.name)

print("Sales Data:", sales_data)
print("Total Sales:", sum(sales_data.values()))
print("Average Sales:", sum(sales_data.values()) / len(sales_data))
print("\nWord Frequencies:", dict(word_count))
print("\nAge Groups:", dict(groups))

In [None]:
# File operations and error handling
import json
import os

# Create a sample data structure
sample_data = {
    'users': [{'name': name, 'color': random.choice(colors)} for name in names],
    'numbers': numbers,
    'timestamp': str(datetime.datetime.now())
}

try:
    # Write JSON data to file
    with open('sample_data.json', 'w') as f:
        json.dump(sample_data, f, indent=2)
    print("Data written to sample_data.json")
    
    # Read and display file contents
    with open('sample_data.json', 'r') as f:
        loaded_data = json.load(f)
    
    print("Loaded data structure:")
    for key, value in loaded_data.items():
        print(f"  {key}: {type(value).__name__} with {len(value) if hasattr(value, '__len__') else 'N/A'} items")

except FileNotFoundError:
    print("File not found!")
except json.JSONDecodeError:
    print("Invalid JSON format!")
except Exception as e:
    print(f"An error occurred: {e}")
finally:
    print("File operation completed.")

In [None]:
# Decorators and generators
import time
import functools

# Decorator example
def timing_decorator(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"{func.__name__} took {end_time - start_time:.4f} seconds")
        return result
    return wrapper

@timing_decorator
def slow_function():
    time.sleep(0.1)  # Simulate slow operation
    return sum(range(1000))

# Generator example
def fibonacci_generator(n):
    a, b = 0, 1
    count = 0
    while count < n:
        yield a
        a, b = b, a + b
        count += 1

def prime_generator(limit):
    def is_prime(n):
        if n < 2:
            return False
        for i in range(2, int(n**0.5) + 1):
            if n % i == 0:
                return False
        return True
    
    for num in range(2, limit):
        if is_prime(num):
            yield num

# Test the functions
print("Testing slow function:")
result = slow_function()
print(f"Result: {result}")

print("\nFirst 10 Fibonacci numbers:")
fib_numbers = list(fibonacci_generator(10))
print(fib_numbers)

print("\nPrime numbers under 30:")
primes = list(prime_generator(30))
print(primes)

In [None]:
# Regular expressions and string manipulation
import re

# Sample text data
sample_text = """
Contact us at info@example.com or support@company.org
Phone numbers: (555) 123-4567, 555.987.6543, +1-800-555-0199
Visit our website: https://www.example.com or http://company.org
"""

# Regular expression patterns
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
phone_pattern = r'[\(\+]?[1-9][0-9 .\-\(\)]{8,}[0-9]'
url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'

# Extract information using regex
emails = re.findall(email_pattern, sample_text)
phones = re.findall(phone_pattern, sample_text)
urls = re.findall(url_pattern, sample_text)

# String manipulation examples
words = ['hello', 'world', 'python', 'programming', 'awesome']
capitalized = [word.capitalize() for word in words]
reversed_words = [word[::-1] for word in words]
long_words = [word for word in words if len(word) > 5]

# Create a formatted string
formatted_string = f"""
Extracted Data:
- Emails found: {len(emails)} -> {emails}
- Phone numbers found: {len(phones)} -> {phones}
- URLs found: {len(urls)} -> {urls}

String Operations:
- Original words: {words}
- Capitalized: {capitalized}
- Reversed: {reversed_words}
- Long words (>5 chars): {long_words}
"""

print(formatted_string)

# Bonus: Password generator
import string
def generate_password(length=12):
    chars = string.ascii_letters + string.digits + "!@#$%^&*"
    return ''.join(random.choice(chars) for _ in range(length))

print(f"Random password: {generate_password()}")

# Phishpedia Analysis and Alternative Setup

Based on the GitHub repository, here's how to run Phishpedia without Pixi installation.

In [None]:
# Phishpedia Alternative Setup Without Pixi
import subprocess
import sys
import os

def analyze_phishpedia_setup():
    """
    Analyze what Phishpedia needs and provide alternative installation methods
    """
    
    print("🔍 PHISHPEDIA ANALYSIS - Running Without Pixi")
    print("=" * 60)
    
    # Key dependencies that Phishpedia likely needs
    key_dependencies = [
        'torch',           # PyTorch for deep learning models
        'torchvision',     # For computer vision
        'opencv-python',   # For image processing
        'Pillow',         # For image handling
        'numpy',          # Numerical computations
        'requests',       # For web requests
        'beautifulsoup4', # HTML parsing
        'selenium',       # Web scraping
        'scikit-learn',   # Machine learning utilities
        'matplotlib',     # Plotting
        'pandas'          # Data manipulation
    ]
    
    print("📦 CORE DEPENDENCIES PHISHPEDIA LIKELY NEEDS:")
    for i, dep in enumerate(key_dependencies, 1):
        print(f"{i:2d}. {dep}")
    
    print("\n🛠️ ALTERNATIVE INSTALLATION METHODS:")
    print("-" * 40)
    
    # Method 1: Using pip
    print("METHOD 1: Using pip (Recommended)")
    pip_commands = [
        "pip install torch torchvision",
        "pip install opencv-python pillow numpy",
        "pip install requests beautifulsoup4 selenium",
        "pip install scikit-learn matplotlib pandas",
        "pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cpu/torch1.10/index.html"
    ]
    
    for cmd in pip_commands:
        print(f"  {cmd}")
    
    # Method 2: Using conda
    print("\nMETHOD 2: Using conda")
    conda_commands = [
        "conda install pytorch torchvision cpuonly -c pytorch",
        "conda install opencv pillow numpy requests",
        "conda install beautifulsoup4 selenium scikit-learn",
        "conda install matplotlib pandas"
    ]
    
    for cmd in conda_commands:
        print(f"  {cmd}")
    
    # Method 3: Requirements file creation
    print("\nMETHOD 3: Create requirements.txt file")
    requirements = """torch>=1.10.0
torchvision>=0.11.0
opencv-python>=4.5.0
Pillow>=8.0.0
numpy>=1.21.0
requests>=2.25.0
beautifulsoup4>=4.9.0
selenium>=3.141.0
scikit-learn>=1.0.0
matplotlib>=3.3.0
pandas>=1.3.0
detectron2
"""
    
    print("Create a requirements.txt file with:")
    print(requirements)
    print("Then run: pip install -r requirements.txt")
    
    return key_dependencies

# Run the analysis
dependencies = analyze_phishpedia_setup()

In [None]:
# Create Windows Setup Scripts for Phishpedia (Alternative to Pixi)

def create_windows_setup_scripts():
    """
    Create batch files and PowerShell scripts for Windows setup
    """
    
    print("🪟 CREATING WINDOWS SETUP SCRIPTS")
    print("=" * 50)
    
    # Batch script content
    batch_script = """@echo off
echo Installing Phishpedia dependencies without Pixi...
echo.

REM Check if Python is installed
python --version >nul 2>&1
if %errorlevel% neq 0 (
    echo ERROR: Python is not installed or not in PATH
    echo Please install Python from https://python.org
    pause
    exit /b 1
)

REM Check if pip is available
python -m pip --version >nul 2>&1
if %errorlevel% neq 0 (
    echo ERROR: pip is not available
    pause
    exit /b 1
)

echo Installing core dependencies...
python -m pip install --upgrade pip
python -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
python -m pip install opencv-python pillow numpy
python -m pip install requests beautifulsoup4 selenium
python -m pip install scikit-learn matplotlib pandas

echo.
echo Downloading models and setting up environment...
REM You would need to download the model files manually
echo Please download the following model files:
echo - rcnn_bet365.pth
echo - faster_rcnn.yaml  
echo - resnetv2_rgb_new.pth.tar
echo From the original repository or provided links

echo.
echo Setup completed! You can now run:
echo python phishpedia.py --folder ./datasets/test_sites
pause
"""

    # PowerShell script content
    powershell_script = """# Phishpedia Setup Script for Windows PowerShell
Write-Host "Installing Phishpedia dependencies without Pixi..." -ForegroundColor Green

# Check Python installation
try {
    $pythonVer = python --version 2>&1
    Write-Host "Found Python: $pythonVer" -ForegroundColor Green
} catch {
    Write-Host "ERROR: Python not found. Install from https://python.org" -ForegroundColor Red
    exit 1
}

# Install dependencies
Write-Host "Installing dependencies..." -ForegroundColor Yellow

$dependencies = @(
    "torch torchvision --index-url https://download.pytorch.org/whl/cpu",
    "opencv-python",
    "pillow",
    "numpy", 
    "requests",
    "beautifulsoup4",
    "selenium",
    "scikit-learn",
    "matplotlib",
    "pandas"
)

foreach ($dep in $dependencies) {
    Write-Host "Installing $dep..." -ForegroundColor Cyan
    python -m pip install $dep
    if ($LASTEXITCODE -ne 0) {
        Write-Host "Failed to install $dep" -ForegroundColor Red
    }
}

Write-Host "Setup completed!" -ForegroundColor Green
Write-Host "Run: python phishpedia.py --folder ./datasets/test_sites" -ForegroundColor Yellow
"""

    # Create the files
    try:
        with open('setup_phishpedia.bat', 'w') as f:
            f.write(batch_script)
        print("✅ Created: setup_phishpedia.bat")
        
        with open('setup_phishpedia.ps1', 'w') as f:
            f.write(powershell_script)
        print("✅ Created: setup_phishpedia.ps1")
        
        print("\n📋 TO USE THESE SCRIPTS:")
        print("1. Save the scripts to your Phishpedia project folder")
        print("2. Run setup_phishpedia.bat OR setup_phishpedia.ps1")
        print("3. Manually download model files if needed")
        
    except Exception as e:
        print(f"❌ Error creating files: {e}")

# Create the setup scripts
create_windows_setup_scripts()

In [None]:
# Step-by-Step Manual Installation Guide

def print_manual_installation_guide():
    """
    Detailed step-by-step guide to install Phishpedia manually
    """
    
    print("📚 STEP-BY-STEP MANUAL INSTALLATION GUIDE")
    print("=" * 60)
    
    steps = [
        {
            "step": 1,
            "title": "Clone the Repository",
            "commands": [
                "git clone https://github.com/lindsey98/Phishpedia.git",
                "cd Phishpedia"
            ],
            "description": "Download the Phishpedia source code"
        },
        {
            "step": 2, 
            "title": "Set up Python Environment (Optional but Recommended)",
            "commands": [
                "python -m venv phishpedia_env",
                "# On Windows:",
                "phishpedia_env\\Scripts\\activate",
                "# On Linux/Mac:",
                "source phishpedia_env/bin/activate"
            ],
            "description": "Create isolated environment to avoid conflicts"
        },
        {
            "step": 3,
            "title": "Install Core Dependencies",
            "commands": [
                "pip install --upgrade pip",
                "pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu",
                "pip install opencv-python pillow numpy",
                "pip install requests beautifulsoup4 selenium"
            ],
            "description": "Install essential Python packages"
        },
        {
            "step": 4,
            "title": "Install Additional ML Libraries",
            "commands": [
                "pip install scikit-learn",
                "pip install matplotlib pandas",
                "pip install webdriver-manager"  # For Selenium
            ],
            "description": "Install machine learning and data processing libraries"
        },
        {
            "step": 5,
            "title": "Download Model Files",
            "commands": [
                "# Create models directory if it doesn't exist",
                "mkdir models (if not exists)",
                "# Download these files manually:",
                "# - rcnn_bet365.pth",
                "# - faster_rcnn.yaml", 
                "# - resnetv2_rgb_new.pth.tar",
                "# - expand_targetlist/ folder",
                "# - domain_map.pkl"
            ],
            "description": "Download pre-trained models and configuration files"
        },
        {
            "step": 6,
            "title": "Test the Installation",
            "commands": [
                "python phishpedia.py --folder ./datasets/test_sites"
            ],
            "description": "Run Phishpedia to verify everything works"
        }
    ]
    
    for step_info in steps:
        print(f"\n🔸 STEP {step_info['step']}: {step_info['title']}")
        print(f"   {step_info['description']}")
        print("   Commands:")
        for cmd in step_info['commands']:
            if cmd.startswith("#"):
                print(f"   {cmd}")  # Comment
            else:
                print(f"   > {cmd}")  # Command
    
    print("\n⚠️  IMPORTANT NOTES:")
    print("- The CPU version of PyTorch is used (slower but works without GPU)")
    print("- You may need to download ChromeDriver for Selenium web scraping")
    print("- Some model files might be large (several GB)")
    print("- Check the original repo for any additional configuration files")
    
    return steps

# Print the guide
installation_steps = print_manual_installation_guide()

In [None]:
# Troubleshooting Common Issues

def print_troubleshooting_guide():
    """
    Common issues and solutions when running Phishpedia without Pixi
    """
    
    print("🔧 TROUBLESHOOTING COMMON ISSUES")
    print("=" * 50)
    
    issues = [
        {
            "issue": "ModuleNotFoundError: No module named 'torch'",
            "solution": [
                "Install PyTorch: pip install torch torchvision",
                "Or use CPU version: pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu"
            ]
        },
        {
            "issue": "cv2 import error", 
            "solution": [
                "Install OpenCV: pip install opencv-python",
                "Or try: pip install opencv-python-headless"
            ]
        },
        {
            "issue": "Model files not found",
            "solution": [
                "Download model files manually from the repository",
                "Check if models/ directory exists",
                "Verify file paths in configs.yaml"
            ]
        },
        {
            "issue": "Selenium WebDriver issues",
            "solution": [
                "Install webdriver-manager: pip install webdriver-manager",
                "Download ChromeDriver manually",
                "Make sure Chrome browser is installed"
            ]
        },
        {
            "issue": "Permission denied errors",
            "solution": [
                "Run terminal as administrator",
                "Use --user flag: pip install --user package_name",
                "Check file permissions"
            ]
        }
    ]
    
    for i, problem in enumerate(issues, 1):
        print(f"\n❌ ISSUE {i}: {problem['issue']}")
        print("   Solutions:")
        for solution in problem['solution']:
            print(f"   • {solution}")
    
    print("\n💡 ADDITIONAL TIPS:")
    tips = [
        "Use virtual environment to avoid dependency conflicts",
        "Check Python version compatibility (3.7+ recommended)",
        "Monitor GPU/CPU usage during model inference",
        "Verify internet connection for downloading models",
        "Check disk space (models can be several GB)"
    ]
    
    for tip in tips:
        print(f"   • {tip}")

def create_requirements_file():
    """
    Create a comprehensive requirements.txt file
    """
    
    requirements_content = """# Phishpedia Requirements (Alternative to Pixi)
# Core dependencies
torch>=1.10.0
torchvision>=0.11.0
opencv-python>=4.5.0
numpy>=1.21.0
Pillow>=8.0.0

# Web scraping and requests
requests>=2.25.0
beautifulsoup4>=4.9.0
selenium>=3.141.0
webdriver-manager>=3.8.0

# Machine learning
scikit-learn>=1.0.0

# Data processing and visualization  
pandas>=1.3.0
matplotlib>=3.3.0

# Additional utilities
tqdm>=4.60.0
pyyaml>=5.4.0
"""
    
    try:
        with open('requirements.txt', 'w') as f:
            f.write(requirements_content)
        print("✅ Created requirements.txt file")
        print("   Run: pip install -r requirements.txt")
        
        # Also show the content
        print("\n📄 REQUIREMENTS.TXT CONTENT:")
        print(requirements_content)
        
    except Exception as e:
        print(f"❌ Error creating requirements.txt: {e}")

# Run troubleshooting guide
print_troubleshooting_guide()
print("\n" + "="*60)
create_requirements_file()

# 🚀 Running Phishpedia on Google Colab

Google Colab is perfect for Phishpedia since it provides free GPU access and most dependencies are pre-installed!

In [None]:
# 🔧 COMPLETE COLAB SETUP FOR PHISHPEDIA
# Run this cell first in Google Colab!

print("🚀 Setting up Phishpedia on Google Colab...")
print("=" * 60)

# Step 1: Clone the repository
print("📥 Step 1: Cloning Phishpedia repository...")
!git clone https://github.com/lindsey98/Phishpedia.git
%cd Phishpedia

# Step 2: Install additional dependencies (most are already in Colab)
print("\n📦 Step 2: Installing additional dependencies...")
!pip install selenium webdriver-manager
!pip install opencv-python-headless  # Headless version for Colab
!apt-get update
!apt-get install -y chromium-browser chromium-chromedriver

# Step 3: Set up ChromeDriver for Selenium
print("\n🌐 Step 3: Setting up ChromeDriver...")
import os
os.environ['PATH'] += ':/usr/lib/chromium-browser/'

# Step 4: Check what's already available in Colab
print("\n✅ Step 4: Checking pre-installed packages in Colab...")
import sys
import pkg_resources

colab_packages = ['torch', 'torchvision', 'numpy', 'opencv-python', 'PIL', 'requests', 'beautifulsoup4', 'matplotlib', 'pandas', 'scikit-learn']

print("Pre-installed packages in Colab:")
for package in colab_packages:
    try:
        version = pkg_resources.get_distribution(package).version
        print(f"  ✅ {package}: {version}")
    except:
        try:
            # Try alternative names
            if package == 'PIL':
                version = pkg_resources.get_distribution('Pillow').version
                print(f"  ✅ Pillow (PIL): {version}")
            elif package == 'opencv-python':
                import cv2
                print(f"  ✅ opencv-python: {cv2.__version__}")
            else:
                print(f"  ❌ {package}: Not found")
        except:
            print(f"  ❌ {package}: Not found")

print("\n🎯 Colab setup completed! Ready to run Phishpedia.")

In [None]:
# 📁 DOWNLOAD AND VERIFY MODEL FILES
# Run this cell after the setup cell

import os
import requests
from pathlib import Path

print("📁 Downloading and setting up model files...")
print("=" * 50)

# Check if models directory exists
models_dir = Path("models")
if not models_dir.exists():
    print("Creating models directory...")
    models_dir.mkdir()

# List required model files
required_files = [
    "rcnn_bet365.pth",
    "faster_rcnn.yaml", 
    "resnetv2_rgb_new.pth.tar",
    "domain_map.pkl"
]

print("Required model files:")
for i, file in enumerate(required_files, 1):
    file_path = models_dir / file
    if file_path.exists():
        size = file_path.stat().st_size / (1024*1024)  # Size in MB
        print(f"  {i}. ✅ {file} ({size:.1f} MB)")
    else:
        print(f"  {i}. ❌ {file} (missing)")

# Check for expand_targetlist directory
targetlist_dir = models_dir / "expand_targetlist"
if targetlist_dir.exists():
    brand_count = len(list(targetlist_dir.glob("*")))
    print(f"  5. ✅ expand_targetlist/ ({brand_count} brands)")
else:
    print(f"  5. ❌ expand_targetlist/ (missing)")

print("\n💡 Note: Model files are large and need to be downloaded separately.")
print("The original repository should contain download links or the files directly.")

# Check if we can access the main script
main_script = Path("phishpedia.py")
if main_script.exists():
    print(f"\n✅ Main script found: {main_script}")
    
    # Show first few lines of the script
    print("\n📄 Preview of phishpedia.py:")
    with open(main_script, 'r') as f:
        lines = f.readlines()[:10]
        for i, line in enumerate(lines, 1):
            print(f"  {i:2d}: {line.rstrip()}")
else:
    print(f"\n❌ Main script not found: {main_script}")

print("\n🔍 Current directory contents:")
for item in sorted(Path(".").iterdir()):
    if item.is_dir():
        print(f"  📁 {item.name}/")
    else:
        print(f"  📄 {item.name}")

In [None]:
# 🧪 TEST PHISHPEDIA IN COLAB
# Run this cell to test if everything works

import sys
import os
from pathlib import Path

print("🧪 Testing Phishpedia setup...")
print("=" * 40)

# Test 1: Check if we can import the main modules
print("🔍 Test 1: Import test")
try:
    # Add current directory to Python path
    if str(Path(".").absolute()) not in sys.path:
        sys.path.append(str(Path(".").absolute()))
    
    # Try importing main components (if available)
    imports_to_test = [
        ('torch', 'PyTorch'),
        ('torchvision', 'TorchVision'), 
        ('cv2', 'OpenCV'),
        ('numpy', 'NumPy'),
        ('requests', 'Requests'),
        ('selenium', 'Selenium'),
        ('PIL', 'Pillow')
    ]
    
    for module, name in imports_to_test:
        try:
            __import__(module)
            print(f"  ✅ {name}")
        except ImportError as e:
            print(f"  ❌ {name}: {e}")
            
except Exception as e:
    print(f"  ❌ Import test failed: {e}")

# Test 2: Check Selenium WebDriver
print("\n🌐 Test 2: Selenium WebDriver test")
try:
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    
    # Set up Chrome options for Colab
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    chrome_options.add_argument('--disable-gpu')
    
    # Try to create a driver instance
    driver = webdriver.Chrome(options=chrome_options)
    print("  ✅ Chrome WebDriver initialized successfully")
    
    # Test a simple page load
    driver.get("https://www.google.com")
    title = driver.title
    print(f"  ✅ Page loaded successfully: {title}")
    driver.quit()
    
except Exception as e:
    print(f"  ❌ WebDriver test failed: {e}")

# Test 3: GPU availability
print("\n🔥 Test 3: GPU availability")
try:
    import torch
    if torch.cuda.is_available():
        gpu_count = torch.cuda.device_count()
        gpu_name = torch.cuda.get_device_name(0)
        print(f"  ✅ GPU available: {gpu_name} ({gpu_count} device(s))")
        print(f"  ✅ CUDA version: {torch.version.cuda}")
    else:
        print("  ⚠️  GPU not available, using CPU")
        print("  💡 In Colab: Runtime > Change runtime type > Hardware accelerator > GPU")
except Exception as e:
    print(f"  ❌ GPU test failed: {e}")

# Test 4: Try running Phishpedia (if possible)
print("\n🎯 Test 4: Phishpedia execution test")
if Path("phishpedia.py").exists():
    print("  ✅ Main script found")
    print("  💡 To run: !python phishpedia.py --folder ./datasets/test_sites")
    
    # Show help if available
    try:
        result = os.system("python phishpedia.py --help")
        if result == 0:
            print("  ✅ Script can be executed")
    except:
        print("  ⚠️  Could not test script execution")
else:
    print("  ❌ Main script not found")

print("\n🎉 Test completed! Check results above.")

In [None]:
# 📋 COMPLETE COLAB USAGE GUIDE FOR PHISHPEDIA

def show_colab_usage_guide():
    """
    Complete guide for using Phishpedia in Google Colab
    """
    
    print("📋 COMPLETE GOOGLE COLAB USAGE GUIDE")
    print("=" * 60)
    
    print("🎯 STEP-BY-STEP COLAB WORKFLOW:")
    print("-" * 40)
    
    steps = [
        "1. Open Google Colab (colab.research.google.com)",
        "2. Create a new notebook or upload this one",
        "3. Enable GPU: Runtime → Change runtime type → GPU",
        "4. Run the setup cell (installs everything)",
        "5. Run the model verification cell",
        "6. Run the testing cell",
        "7. Create test data or use provided datasets",
        "8. Execute Phishpedia on your data"
    ]
    
    for step in steps:
        print(f"   {step}")
    
    print("\n🚀 READY-TO-USE COLAB COMMANDS:")
    print("-" * 40)
    
    commands = {
        "Clone and setup": "!git clone https://github.com/lindsey98/Phishpedia.git && cd Phishpedia",
        "Install dependencies": "!pip install selenium webdriver-manager opencv-python-headless",
        "Run on test data": "!python phishpedia.py --folder ./datasets/test_sites",
        "Check GPU": "import torch; print(f'GPU: {torch.cuda.is_available()}')",
        "List contents": "!ls -la",
        "Show help": "!python phishpedia.py --help"
    }
    
    for desc, cmd in commands.items():
        print(f"   {desc}:")
        print(f"     {cmd}")
        print()
    
    print("⚡ COLAB ADVANTAGES:")
    advantages = [
        "✅ Free GPU access (Tesla T4/K80)",
        "✅ Pre-installed PyTorch, OpenCV, NumPy",
        "✅ No local installation needed",
        "✅ Easy sharing and collaboration", 
        "✅ Automatic dependency management",
        "✅ 12+ hours of continuous runtime"
    ]
    
    for advantage in advantages:
        print(f"   {advantage}")
    
    print("\n⚠️  COLAB LIMITATIONS:")
    limitations = [
        "❌ Session timeout after inactivity",
        "❌ Files deleted when session ends",
        "❌ Limited storage space (~78GB)",
        "❌ Network restrictions on some sites",
        "❌ Cannot install system packages easily"
    ]
    
    for limitation in limitations:
        print(f"   {limitation}")
    
    print("\n💾 SAVE YOUR WORK:")
    save_tips = [
        "• Save notebooks to Google Drive",
        "• Download results before session ends",
        "• Use !zip to compress output files",
        "• Mount Google Drive for persistent storage"
    ]
    
    for tip in save_tips:
        print(f"   {tip}")

# Create sample test data structure
def create_sample_test_data():
    """
    Create sample test data structure for Phishpedia
    """
    
    print("\n📁 CREATING SAMPLE TEST DATA:")
    print("-" * 30)
    
    import os
    from pathlib import Path
    
    # Create test directory structure
    test_dir = Path("sample_test_sites")
    test_dir.mkdir(exist_ok=True)
    
    # Sample sites data
    sample_sites = [
        {
            "name": "test_site_1",
            "url": "https://example-phishing-site.com",
            "description": "Sample phishing site mimicking a bank"
        },
        {
            "name": "test_site_2", 
            "url": "https://legitimate-site.com",
            "description": "Sample legitimate site"
        }
    ]
    
    for site in sample_sites:
        site_dir = test_dir / site["name"]
        site_dir.mkdir(exist_ok=True)
        
        # Create info.txt
        info_file = site_dir / "info.txt"
        info_file.write_text(site["url"])
        
        print(f"✅ Created: {site_dir}/")
        print(f"   └── info.txt (contains: {site['url']})")
        print(f"   └── shot.png (you need to add screenshot)")
        print()
    
    print("📝 To complete the test data:")
    print("   1. Add screenshot files (shot.png) to each test site folder")
    print("   2. Run: !python phishpedia.py --folder ./sample_test_sites")

# Run the guide
show_colab_usage_guide()
create_sample_test_data()