In [ ]:
import os
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json
from urllib.parse import urljoin, urlparse
import re
from src.config import config

class FanDuelPageSaver:
    def __init__(self, save_directory="fanduel_page_data"):
        self.save_dir = save_directory
        self.setup_directories()
        self.setup_driver()
        
    def setup_directories(self):
        """Create directory structure for saving page assets"""
        directories = [
            self.save_dir,
            os.path.join(self.save_dir, "css"),
            os.path.join(self.save_dir, "js"), 
            os.path.join(self.save_dir, "images")
        ]
        
        for directory in directories:
            os.makedirs(directory, exist_ok=True)
            
    def setup_driver(self):
        """Setup Chrome driver with FanDuel-specific options and cookies enabled"""
        chrome_options = Options()
        
        # Essential options
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")
        chrome_options.add_argument("--disable-blink-features=AutomationControlled")
        chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
        chrome_options.add_experimental_option('useAutomationExtension', False)
        
        # Cookie and session management
        chrome_options.add_argument("--enable-cookies")
        chrome_options.add_argument("--disable-web-security")
        chrome_options.add_argument("--allow-running-insecure-content")
        chrome_options.add_argument("--disable-features=VizDisplayCompositor")
        
        # Create a persistent user data directory for cookies
        user_data_dir = os.path.join(self.save_dir, "chrome_profile")
        os.makedirs(user_data_dir, exist_ok=True)
        chrome_options.add_argument(f"--user-data-dir={user_data_dir}")
        
        # More realistic browser behavior
        chrome_options.add_argument("--disable-extensions")
        chrome_options.add_argument("--disable-plugins-discovery")
        chrome_options.add_argument("--start-maximized")
        chrome_options.add_argument("--disable-default-apps")
        
        # FanDuel-specific user agent (more recent)
        chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36")
        
        # Additional stealth options
        chrome_options.add_argument("--disable-logging")
        chrome_options.add_argument("--disable-gpu-logging")
        chrome_options.add_argument("--silent")
        
        self.driver = webdriver.Chrome(options=chrome_options)
        
        # Execute stealth scripts
        self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        self.driver.execute_cdp_cmd('Network.setUserAgentOverride', {
            "userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
        })
        
        # Set realistic window size
        self.driver.set_window_size(1920, 1080)
        
        print("✅ Chrome driver setup with cookies enabled")
    
    def save_complete_page(self, url):
        """Save the complete FanDuel page with all assets"""
        print(f"Preparing to load FanDuel page: {url}")
        
        try:
            # CRITICAL: Visit FanDuel domain first to set cookies BEFORE loading the actual page
            print("🍪 Step 1: Setting up cookies on FanDuel domain...")
            self.driver.get("https://sportsbook.fanduel.com/")  # Visit homepage first
            
            # Add cookies immediately while on the domain
            self.add_realistic_cookies()
            
            # Now navigate to the actual page WITH cookies already set
            print(f"🎯 Step 2: Loading target page with cookies: {url}")
            self.driver.get(url)
            
            # Wait for FanDuel-specific elements to load - specifically moneyline odds
            print("⏳ Step 3: Waiting for moneyline odds to load...")
            try:
                WebDriverWait(self.driver, 20).until(
                    EC.any_of(
                        # Primary target: Moneyline odds elements
                        EC.presence_of_element_located((By.CSS_SELECTOR, "[aria-label*='Moneyline'][aria-label*='Odds']")),
                        EC.presence_of_element_located((By.XPATH, "//div[contains(@aria-label, 'Moneyline') and contains(@aria-label, 'Odds')]")),
                        
                        # Fallback selectors in case structure changes
                        EC.presence_of_element_located((By.CSS_SELECTOR, "[data-test-id*='moneyline']")),
                        EC.presence_of_element_located((By.CSS_SELECTOR, "[aria-label*='Moneyline']")),
                        EC.presence_of_element_located((By.CSS_SELECTOR, "div[role='button'][aria-label*='Odds']"))
                    )
                )
                print("✅ Moneyline odds elements found!")
                
            except TimeoutException:
                print("❌ TIMEOUT: Moneyline odds elements not found after 20 seconds")
                print("🔍 Debugging info:")
                print(f"   Current URL: {self.driver.current_url}")
                print(f"   Page title: {self.driver.title}")
                
                # Check what elements we can actually find
                aria_labels = self.driver.find_elements(By.CSS_SELECTOR, "[aria-label]")
                if aria_labels:
                    print(f"   Found {len(aria_labels)} elements with aria-label:")
                    for i, elem in enumerate(aria_labels[:5]):  # Show first 5
                        aria_text = elem.get_attribute("aria-label")[:100]  # Truncate long labels
                        print(f"     {i+1}. {aria_text}")
                    if len(aria_labels) > 5:
                        print(f"     ... and {len(aria_labels) - 5} more")
                else:
                    print("   No elements with aria-label found")
                
                # Check if we're on the right page
                if "fanduel" not in self.driver.current_url.lower():
                    print("❗ ERROR: Not on FanDuel page - check URL and cookies")
                elif "verify" in self.driver.page_source.lower() or "human" in self.driver.page_source.lower():
                    print("❗ ERROR: Still getting bot verification - cookies may be expired")
                elif "blocked" in self.driver.page_source.lower() or "access denied" in self.driver.page_source.lower():
                    print("❗ ERROR: Access blocked - may need fresh VPN/cookies")
                else:
                    print("❗ ERROR: On FanDuel page but moneyline odds not loading")
                    print("   Possible causes:")
                    print("   - Game may not be available for betting")
                    print("   - Page structure changed")
                    print("   - Odds not yet published")
                    print("   - Region restrictions")
                
                # Save current page for debugging
                debug_file = os.path.join(self.save_dir, "debug_page_source.html")
                with open(debug_file, "w", encoding="utf-8") as f:
                    f.write(self.driver.page_source)
                print(f"   💾 Page source saved to: {debug_file}")
                
                raise Exception("Failed to find moneyline odds elements - see debug info above")
            
            # Give extra time for dynamic content and odds updates
            print("⏳ Step 4: Waiting for dynamic content to load...")
            time.sleep(8)
            
            print("✅ FanDuel page loaded successfully with cookies!")
            
            # Save HTML
            self.save_html()
            
            # Save CSS files
            self.save_css_files()
            
            # Save page metadata
            self.save_page_metadata(url)
            
            # Create a simplified offline version
            self.create_offline_version()
            
            print(f"🎉 FanDuel page saved successfully to: {self.save_dir}")
            
        except Exception as e:
            print(f"❌ Error saving FanDuel page: {e}")
            # Print current page source for debugging
            print("Current page title:", self.driver.title)
            if "verify" in self.driver.page_source.lower() or "human" in self.driver.page_source.lower():
                print("🤖 Still getting bot detection. The cookies might be session-bound.")
                print("💡 Try: Export fresh cookies and run script immediately after export")
    
    def add_realistic_cookies(self):
        """Add FanDuel cookies from configuration"""
        try:
            # Load cookies from config.FANDUEL_COOKIES
            if not config.FANDUEL_COOKIES:
                print("❌ No FanDuel cookies found in configuration")
                print("💡 Make sure fanduel_cookies.yml file exists and contains valid cookies")
                return
            
            for cookie_data in config.FANDUEL_COOKIES:
                try:
                    # Add cookie with all the correct attributes
                    cookie = {
                        'name': cookie_data['name'],
                        'value': cookie_data['value'],
                        'domain': cookie_data['domain'],
                        'path': cookie_data.get('path', '/'),
                        'secure': cookie_data.get('secure', False)
                    }
                    
                    self.driver.add_cookie(cookie)
                    print(f"  ✅ Added: {cookie['name']} (domain: {cookie['domain']})")
                    
                except Exception as e:
                    print(f"  ❌ Failed: {cookie_data['name']} - {e}")
            
            print(f"✅ Added {len(config.FANDUEL_COOKIES)} FanDuel cookies from configuration")
            
        except Exception as e:
            print(f"❌ Error adding cookies: {e}")
    
    def save_html(self):
        """Save the current HTML content"""
        print("Saving FanDuel HTML...")
        
        html_content = self.driver.page_source
        
        # Save original HTML
        with open(os.path.join(self.save_dir, "original.html"), "w", encoding="utf-8") as f:
            f.write(html_content)
            
        # Save cleaned HTML
        cleaned_html = self.clean_html_for_offline(html_content)
        with open(os.path.join(self.save_dir, "cleaned.html"), "w", encoding="utf-8") as f:
            f.write(cleaned_html)
            
        print("✅ FanDuel HTML saved")
    
    def save_css_files(self):
        """Extract and save CSS files"""
        print("Saving FanDuel CSS files...")
        
        try:
            css_links = self.driver.find_elements(By.CSS_SELECTOR, "link[rel='stylesheet']")
            
            for i, link in enumerate(css_links):
                try:
                    href = link.get_attribute("href")
                    if href and "fanduel" in href:  # Only save FanDuel CSS
                        response = requests.get(href, timeout=10)
                        if response.status_code == 200:
                            filename = f"fanduel_styles_{i}.css"
                            with open(os.path.join(self.save_dir, "css", filename), "w", encoding="utf-8") as f:
                                f.write(response.text)
                            print(f"  ✅ Saved: {filename}")
                except Exception as e:
                    print(f"  ❌ Failed to save CSS: {e}")
            
            # Save inline styles
            inline_styles = self.driver.find_elements(By.TAG_NAME, "style")
            for i, style in enumerate(inline_styles):
                style_content = style.get_attribute("innerHTML")
                if style_content:
                    with open(os.path.join(self.save_dir, "css", f"inline_{i}.css"), "w", encoding="utf-8") as f:
                        f.write(style_content)
                        
        except Exception as e:
            print(f"❌ Error saving CSS: {e}")
    
    def save_page_metadata(self, url):
        """Save FanDuel-specific page metadata"""
        print("Saving FanDuel page metadata...")
        
        try:
            metadata = {
                "url": url,
                "site": "FanDuel",
                "title": self.driver.title,
                "timestamp": time.strftime('%Y-%m-%d %H:%M:%S'),
                "page_structure": self.analyze_fanduel_structure(),
                "odds_elements": self.find_fanduel_odds_elements(),
                "useful_selectors": self.find_fanduel_selectors()
            }
            
            with open(os.path.join(self.save_dir, "metadata.json"), "w", encoding="utf-8") as f:
                json.dump(metadata, f, indent=2)
                
            print("✅ FanDuel metadata saved")
            
        except Exception as e:
            print(f"❌ Error saving metadata: {e}")
    
    def analyze_fanduel_structure(self):
        """Analyze FanDuel-specific page structure"""
        try:
            structure = {
                "total_elements": len(self.driver.find_elements(By.XPATH, "//*")),
                "data_test_ids": len(self.driver.find_elements(By.CSS_SELECTOR, "[data-test-id]")),
                "bet_buttons": len(self.driver.find_elements(By.CSS_SELECTOR, "button[class*='bet'], button[class*='outcome']")),
                "odds_containers": len(self.driver.find_elements(By.CSS_SELECTOR, "[class*='odds'], [class*='price']")),
                "moneyline_elements": len(self.driver.find_elements(By.CSS_SELECTOR, "[data-test-id*='moneyline'], [class*='moneyline']")),
                "team_elements": len(self.driver.find_elements(By.CSS_SELECTOR, "[class*='team'], [class*='participant']"))
            }
            return structure
        except Exception as e:
            return {"error": str(e)}
    
    def find_fanduel_odds_elements(self):
        """Find FanDuel odds-related elements"""
        odds_info = []
        
        try:
            # FanDuel-specific selectors
            fanduel_selectors = [
                "[data-test-id*='odd']",
                "[data-test-id*='moneyline']",
                "[data-test-id*='outcome']", 
                ".sportsbook-odds",
                "button[class*='bet']",
                "button[class*='outcome']",
                "[class*='odds']",
                "[class*='price']"
            ]
            
            for selector in fanduel_selectors:
                elements = self.driver.find_elements(By.CSS_SELECTOR, selector)
                if elements:
                    odds_info.append({
                        "selector": selector,
                        "count": len(elements),
                        "sample_texts": [elem.text[:50] for elem in elements[:3] if elem.text.strip()],
                        "sample_attributes": [
                            {
                                "data-test-id": elem.get_attribute("data-test-id"),
                                "class": elem.get_attribute("class")
                            } for elem in elements[:2]
                        ]
                    })
            
            return odds_info
            
        except Exception as e:
            return [{"error": str(e)}]
    
    def find_fanduel_selectors(self):
        """Find useful selectors for FanDuel scraping"""
        selectors = {}
        
        try:
            # Look for team/game info
            game_elements = self.driver.find_elements(By.CSS_SELECTOR, 
                "h1, h2, [data-test-id*='game'], [data-test-id*='event'], [class*='game'], [class*='event']")
            if game_elements:
                selectors["game_info"] = {
                    "selector": "h1, h2, [data-test-id*='game'], [data-test-id*='event']",
                    "count": len(game_elements),
                    "samples": [elem.text[:50] for elem in game_elements[:3] if elem.text.strip()]
                }
            
            # Look for moneyline specifically
            moneyline_elements = self.driver.find_elements(By.CSS_SELECTOR, 
                "[data-test-id*='moneyline'], [class*='moneyline']")
            if moneyline_elements:
                selectors["moneyline"] = {
                    "selector": "[data-test-id*='moneyline'], [class*='moneyline']",
                    "count": len(moneyline_elements),
                    "samples": [elem.text[:50] for elem in moneyline_elements[:3] if elem.text.strip()]
                }
            
            # Look for American odds patterns
            all_text = self.driver.find_element(By.TAG_NAME, "body").text
            american_odds_matches = re.findall(r'[+-]\d{2,4}', all_text)
            if american_odds_matches:
                selectors["american_odds_found"] = {
                    "pattern": r'[+-]\d{2,4}',
                    "samples": american_odds_matches[:10]
                }
            
            return selectors
            
        except Exception as e:
            return {"error": str(e)}
    
    def clean_html_for_offline(self, html_content):
        """Clean HTML for offline testing"""
        # Remove external scripts
        html_content = re.sub(r'<script[^>]+src="[^"]*"[^>]*></script>', '', html_content)
        
        # Remove external CSS (we saved them locally)
        html_content = re.sub(r'<link[^>]+rel="stylesheet"[^>]*>', '', html_content)
        
        # Remove problematic inline scripts
        html_content = re.sub(r'<script[^>]*>(.*?)</script>', '', html_content, flags=re.DOTALL)
        
        # Add note
        note = """
        <!-- 
        This is a cleaned version of the FanDuel page for scraping tests.
        External scripts and stylesheets have been removed.
        Look for data-test-id attributes and button elements for odds.
        -->
        """
        
        
        html_content = html_content.replace('<html', note + '\n<html')
        return html_content
    
    def create_offline_version(self):
        """Create a simplified FanDuel test page"""
        print("Creating FanDuel offline test version...")
        
        offline_html = f"""
<!DOCTYPE html>
<html>
<head>
    <title>FanDuel Test Page - Padres vs Brewers</title>
    <style>
        body {{ font-family: Arial, sans-serif; margin: 20px; background: #1a1a1a; color: white; }}
        .game-header {{ background: #0f4c75; padding: 20px; margin-bottom: 20px; }}
        .moneyline-container {{ background: #2a2a2a; padding: 20px; border-radius: 8px; }}
        .bet-button {{ 
            background: #4caf50; color: white; border: none; padding: 12px 24px; 
            margin: 5px; border-radius: 4px; cursor: pointer; font-weight: bold;
        }}
        .bet-button:hover {{ background: #45a049; }}
        .team-row {{ display: flex; justify-content: space-between; align-items: center; margin: 10px 0; }}
        .team-name {{ font-weight: bold; font-size: 16px; }}
        .odds-display {{ font-size: 18px; font-weight: bold; }}
    </style>
</head>
<body>
    <div class="game-header">
        <h1 data-test-id="game-title">San Diego Padres @ Milwaukee Brewers</h1>
        <p>MLB - Moneyline</p>
    </div>
    
    <div class="moneyline-container" data-test-id="moneyline-market">
        <h2>Moneyline</h2>
        
        <div class="team-row" data-test-id="padres-row">
            <span class="team-name">San Diego Padres</span>
            <button class="bet-button" data-test-id="padres-moneyline-outcome">
                <span class="odds-display">-102</span>
            </button>
        </div>
        
        <div class="team-row" data-test-id="brewers-row">
            <span class="team-name">Milwaukee Brewers</span>
            <button class="bet-button" data-test-id="brewers-moneyline-outcome">
                <span class="odds-display">-116</span>
            </button>
        </div>
    </div>
    
    <!-- Additional test elements -->
    <div class="sportsbook-odds" data-test-id="all-odds">
        <div data-test-id="outcome-padres">Padres -102</div>
        <div data-test-id="outcome-brewers">Brewers -116</div>
    </div>
    
</body>
</html>
        """
        
        with open(os.path.join(self.save_dir, "test_page.html"), "w", encoding="utf-8") as f:
            f.write(offline_html)
            
        print("✅ FanDuel offline test page created")
    
    def close(self):
        """Close the browser"""
        if hasattr(self, 'driver'):
            self.driver.quit()
    
    def __enter__(self):
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

# Usage
def main():
    # FanDuel URL (corrected)
    fanduel_url = "https://sportsbook.fanduel.com/baseball/mlb/san-diego-padres-@-milwaukee-brewers-34394252"
    
    print("🚀 Starting FanDuel page saver with enhanced bot detection evasion...")
    print("💡 If you still get blocked, try:")
    print("   1. Run with headless=False to see what's happening")
    print("   2. Use a VPN to change your IP")
    print("   3. Wait a few minutes between attempts")
    
    with FanDuelPageSaver() as saver:
        saver.save_complete_page(fanduel_url)
        
        print("\n" + "="*50)
        print("FANDUEL FILES SAVED:")
        print("="*50)
        print("📁 fanduel_page_data/")
        print("   📄 original.html      - Full FanDuel page source")
        print("   📄 cleaned.html       - Cleaned for testing")
        print("   📄 test_page.html     - Simplified test version")
        print("   📄 metadata.json      - FanDuel page analysis")
        print("   📁 css/               - FanDuel stylesheets")
        print("   📁 chrome_profile/    - Persistent browser profile")
        print("\n💡 Key FanDuel selectors to look for:")
        print("   - [data-test-id*='moneyline']")
        print("   - [data-test-id*='outcome']")
        print("   - button[class*='bet']")
        print("   - American odds pattern: [+-]\\d{2,4}")

# Alternative: Manual cookie setup for testing
def test_with_manual_cookies():
    """
    Alternative approach: manually visit FanDuel first, then run scraper
    """
    print("🛠️  MANUAL SETUP APPROACH:")
    print("1. Open Chrome manually")
    print("2. Go to FanDuel and accept cookies")
    print("3. Close Chrome")
    print("4. Run this script - it will use the same profile")
    
    # Use system Chrome profile
    chrome_options = Options()
    chrome_options.add_argument("--user-data-dir=/Users/yourname/Library/Application Support/Google/Chrome")  # macOS
    # chrome_options.add_argument("--user-data-dir=C:\\Users\\yourname\\AppData\\Local\\Google\\Chrome\\User Data")  # Windows
    
    driver = webdriver.Chrome(options=chrome_options)
    driver.get("https://sportsbook.fanduel.com")
    input("Press Enter after you see the page loads correctly...")
    driver.quit()

if __name__ == "__main__":
    main()