In [None]:
#Scoping Gambling-Automation 
##Run Page Scoring Algorithms

import re
from bs4 import BeautifulSoup
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import sent_tokenize, word_tokenize
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from datetime import datetime
import time
import os
import logging
import json
import keyboard
import glob

# Download necessary NLTK data
nltk.download('vader_lexicon', quiet=True)
nltk.download('punkt', quiet=True)

class OnlineGamblingScoring:
    """
    Main class for analyzing and scoring online gambling websites.
    Includes methods for analyzing mobile optimization, game rules,
    terms and conditions, responsible gaming features, and language use.
    """
    
    def __init__(self, html_content):
        """Initialize with HTML content to analyze"""
        self.html_content = html_content
        self.soup = BeautifulSoup(html_content, 'html.parser')
        
        # Setup logging
        logging.basicConfig(
            filename='gambling_analysis.log',
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s'
        )

    def get_complete_analysis(self):
        """
        Run all analysis methods and return comprehensive results
        """
        return {
            'mobile_optimization': self.score_mobile_optimization(),
            'game_rules': self.analyze_game_rules_clarity(),
            'tc_accessibility': self.assess_tc_accessibility(),
            'responsible_gaming': self.extract_responsible_gaming_features(),
            'language_neutrality': self.analyze_language_neutrality()
        }

    def calculate_overall_score(self):
        """
        Calculate overall site score based on all metrics
        """
        analysis = self.get_complete_analysis()
        
        # Weight the different components
        weights = {
            'mobile_optimization': 0.15,
            'game_rules': 0.25,
            'tc_accessibility': 0.20,
            'responsible_gaming': 0.25,
            'language_neutrality': 0.15
        }
        
        weighted_scores = {}
        total_score = 0
        
        for category, weight in weights.items():
            if category in analysis:
                score = analysis[category].get('score', 0)
                weighted_scores[category] = score * weight
                total_score += weighted_scores[category]
        
        return {
            'total_score': total_score,
            'weighted_scores': weighted_scores,
            'analysis': analysis
        }

        def score_mobile_optimization(self):
    """
    ##Analyze mobile optimization features and score them
    """
    score = 0
    findings = []

    # Check viewport meta tag
    viewport = self.soup.find('meta', attrs={'name': 'viewport'})
    if viewport:
        score += 1
        findings.append("Viewport meta tag present")
        content = viewport.get('content', '').lower()
        if 'width=device-width' in content:
            score += 0.5
            findings.append("Responsive width configuration")
        if 'initial-scale=1' in content:
            score += 0.5
            findings.append("Proper initial scaling")

    # Check for media queries
    style_tags = self.soup.find_all('style')
    media_queries = sum('@media' in tag.string for tag in style_tags if tag.string)
    if media_queries:
        score += min(media_queries, 3)
        findings.append(f"Media queries present: {media_queries}")

    # Check for responsive classes
    responsive_patterns = r'(responsive|mobile|sm-|md-|lg-|xl-|flex-|grid-)'
    responsive_classes = len(self.soup.find_all(class_=re.compile(responsive_patterns)))
    if responsive_classes:
        score += min(responsive_classes / 10, 2)
        findings.append(f"Responsive classes found: {responsive_classes}")

    # Check for flexible units
    style_tags_text = ' '.join(tag.string for tag in style_tags if tag.string)
    flexible_units = len(re.findall(r'\b(vw|vh|vmin|vmax|rem|em|%)\b', style_tags_text))
    if flexible_units:
        score += min(flexible_units / 20, 1)
        findings.append(f"Flexible units used: {flexible_units}")

    # Check for touch-friendly elements
    touch_elements = len(self.soup.find_all(class_=re.compile(r'(touch|swipe|tap)')))
    if touch_elements:
        score += min(touch_elements / 5, 1)
        findings.append(f"Touch-friendly elements: {touch_elements}")

    return {
        'score': min(score, 10),  # Cap at 10
        'findings': findings
    }

def analyze_game_rules_clarity(self):
    """
    Analyze the clarity and accessibility of game rules
    """
    score = 0
    findings = []
    rules_structure = {
        'sections_found': [],
        'has_odds': False,
        'has_prize_structure': False,
        'has_gameplay_instructions': False
    }

    # Find rules section
    rules_containers = self.soup.find_all(['div', 'section'], 
                                        id=re.compile(r'(rules|procedures|instructions|how-to-play)', re.I))
    
    for container in rules_containers:
        # Check for structured sections
        headers = container.find_all(['h1', 'h2', 'h3', 'h4'])
        for header in headers:
            rules_structure['sections_found'].append(header.text.strip())
            score += 0.5  # Points for each structured section

        # Check for odds information
        odds_patterns = [
            r'1 in \d+(?:\.\d+)?',
            r'odds of winning',
            r'chance of winning',
            r'probability'
        ]
        
        for pattern in odds_patterns:
            if container.find(string=re.compile(pattern, re.I)):
                rules_structure['has_odds'] = True
                score += 2
                findings.append("Odds information clearly stated")
                break

        # Check for prize structure
        prize_table = container.find('table', class_=re.compile(r'(odds|prize)', re.I))
        if prize_table:
            rules_structure['has_prize_structure'] = True
            score += 2
            findings.append("Prize structure table present")

        # Check for gameplay instructions
        if container.find(string=re.compile(r'how to play|gameplay|instructions', re.I)):
            rules_structure['has_gameplay_instructions'] = True
            score += 2
            findings.append("Clear gameplay instructions provided")

    # Check for bullet points or numbered lists (easy-to-follow format)
    list_items = len(container.find_all(['li', 'ul', 'ol']))
    if list_items:
        score += min(list_items / 10, 1)
        findings.append(f"Structured list items found: {list_items}")

    return {
        'score': min(score, 10),  # Cap at 10
        'findings': findings,
        'rules_structure': rules_structure
    }

   # Part 3 - T&C accessibility and responsible gaming analysis methods:

```python
def assess_tc_accessibility(self):
    """
    Analyze the accessibility of Terms and Conditions
    """
    score = 0
    findings = []
    
    # Look for T&C links
    tc_patterns = [
        r'terms|conditions|rules|procedures',
        r'important information',
        r'legal|disclaimer',
        r'privacy policy'
    ]
    
    tc_links = self.soup.find_all('a', string=re.compile('|'.join(tc_patterns), re.I))
    
    # Score based on link placement and visibility
    for link in tc_links:
        # Check if link is in navigation/header
        if link.find_parent(['nav', 'header']):
            score += 2
            findings.append("T&C link in primary navigation")
        
        # Check if link opens in new window (user-friendly)
        if link.get('target') == '_blank':
            score += 0.5
            findings.append("T&C opens in new window")
            
        # Check if link has clear labeling
        if any(re.search(pattern, link.text, re.I) for pattern in tc_patterns):
            score += 0.5
            findings.append(f"Clear T&C link text: {link.text}")

    # Check for direct T&C content
    tc_content = self.soup.find('div', id=re.compile(r'(terms|conditions|rules)[-_]?content', re.I))
    if tc_content:
        score += 2
        findings.append("Direct T&C content on page")
        
        # Check content structure
        headers = tc_content.find_all(['h1', 'h2', 'h3'])
        if headers:
            score += min(len(headers) / 2, 1)
            findings.append(f"Structured T&C content with {len(headers)} sections")

    # Check for prominence/visibility
    html_string = str(self.soup)
    if tc_links or tc_content:
        positions = []
        for element in (tc_links + ([tc_content] if tc_content else [])):
            try:
                position = html_string.index(str(element))
                positions.append(position)
            except ValueError:
                continue
                
        if positions:
            relative_position = min(positions) / len(html_string)
            if relative_position < 0.3:
                score += 2
                findings.append("T&C information prominently placed")
            elif relative_position < 0.6:
                score += 1
                findings.append("T&C information moderately placed")

    return {
        'score': min(score, 10),
        'findings': findings,
        'link_count': len(tc_links),
        'has_direct_content': bool(tc_content)
    }

def extract_responsible_gaming_features(self):
    """
    Analyze responsible gaming features and their prominence
    """
    score = 0
    findings = []
    
    # Define responsible gaming indicators
    rg_patterns = {
        'primary': [
            r'responsible\s+(?:gaming|gambling|play)',
            r'healthy\s+play',
            r'safer\s+gambling',
            r'play\s+safe',
            r'gambling\s+limits'
        ],
        'secondary': [
            r'take\s+a\s+break',
            r'set\s+a\s+limit',
            r'play\s+within\s+your\s+means',
            r'age\s+verification',
            r'self[-\s]exclusion'
        ]
    }

    # Check for responsible gaming links in navigation
    nav_elements = self.soup.find_all(['nav', 'header'])
    for nav in nav_elements:
        for pattern in rg_patterns['primary']:
            rg_links = nav.find_all('a', string=re.compile(pattern, re.I))
            if rg_links:
                score += 2
                findings.append(f"Primary RG link in navigation: {rg_links[0].text.strip()}")

    # Check for age verification messaging
    age_elements = self.soup.find_all(string=re.compile(r'18\+|18\s+or\s+over|age\s+verification', re.I))
    if age_elements:
        score += 1
        findings.append("Age verification messaging present")
        
        # Check prominence of age verification
        for element in age_elements:
            if element.parent.name in ['strong', 'b', 'h1', 'h2', 'h3']:
                score += 0.5
                findings.append("Prominent age verification display")

    # Check for responsible gaming content
    for pattern_type, patterns in rg_patterns.items():
        for pattern in patterns:
            elements = self.soup.find_all(string=re.compile(pattern, re.I))
            if elements:
                score += 2 if pattern_type == 'primary' else 1
                findings.append(f"Found {pattern_type} RG messaging: {elements[0].strip()}")

    # Check for responsible gaming tools/features mentions
    tool_patterns = [
        r'deposit\s+limit',
        r'loss\s+limit',
        r'session\s+(?:limit|timer)',
        r'reality\s+check',
        r'time\s+out'
    ]
    
    for pattern in tool_patterns:
        if self.soup.find(string=re.compile(pattern, re.I)):
            score += 0.5
            findings.append(f"RG tool mentioned: {pattern}")

    return {
        'score': min(score, 10),
        'findings': findings,
        'has_nav_links': bool(nav_elements),
        'has_age_verification': bool(age_elements)
    }
```
##Language Neutrality 
def analyze_language_neutrality(self):
    """
    Analyze language neutrality in gambling content,
    focusing only on verifiable content from the HTML
    """
    score = 0
    findings = []
    
    class TextAnalyzer:
        def __init__(self, soup):
            self.soup = soup
            self.sia = SentimentIntensityAnalyzer()
        
        def get_game_content(self):
            """Extract game-related content for analysis"""
            content_areas = {
                'description': self.soup.find('div', class_='copy_block'),
                'procedures': self.soup.find('div', id='procedures_content'),
                'rules': self.soup.find('div', class_=re.compile(r'rules|instructions', re.I))
            }
            
            return {k: v.get_text(strip=True) if v else '' for k, v in content_areas.items()}

        def analyze_odds_presentation(self, text):
            """Analyze how odds and probabilities are presented"""
            findings = []
            score = 0
            
            # Check for numerical odds presentation
            odds_patterns = [
                (r'1 in \d+(?:\.\d+)?', 2, "Clear numerical odds"),
                (r'\d+(?:\.\d+)?\s*%\s*chance', 2, "Percentage chance stated"),
                (r'probability of winning', 1, "Probability mentioned")
            ]
            
            for pattern, points, message in odds_patterns:
                if re.search(pattern, text, re.I):
                    score += points
                    findings.append(message)
            
            return score, findings

        def analyze_prize_presentation(self, text):
            """Analyze how prizes are presented"""
            score = 0
            findings = []
            
            # Check for clear monetary values
            if re.search(r'£\d+(?:,\d+)*(?:\.\d+)?', text):
                score += 1
                findings.append("Clear currency presentation")
            
            # Check for structured prize information
            if self.soup.find('table', class_='odds_table'):
                score += 2
                findings.append("Structured prize information")
            
            return score, findings

        def analyze_language_tone(self, text):
            """Analyze the tone of language used"""
            score = 0
            findings = []
            
            # Count exclamation marks
            exclamation_count = text.count('!')
            if exclamation_count == 0:
                score += 2
                findings.append("Neutral punctuation use")
            elif exclamation_count <= 2:
                score += 1
                findings.append("Moderate emphasis in punctuation")
            
            # Analyze sentiment
            sentiment = self.sia.polarity_scores(text)
            if abs(sentiment['compound']) < 0.3:
                score += 2
                findings.append("Neutral emotional tone")
            elif abs(sentiment['compound']) < 0.6:
                score += 1
                findings.append("Moderate emotional tone")
            
            return score, findings

    # Perform analysis
    analyzer = TextAnalyzer(self.soup)
    content = analyzer.get_game_content()
    
    # Combine all content for analysis
    full_text = ' '.join(content.values())
    
    if not full_text.strip():
        return {
            'score': None,
            'findings': ['Insufficient text content for analysis'],
            'analyzable': False
        }
    
    # Analyze different aspects
    odds_score, odds_findings = analyzer.analyze_odds_presentation(full_text)
    prize_score, prize_findings = analyzer.analyze_prize_presentation(full_text)
    tone_score, tone_findings = analyzer.analyze_language_tone(full_text)
    
    total_score = odds_score + prize_score + tone_score
    normalized_score = min((total_score / 10) * 10, 10)  # Normalize to 0-10 scale
    
    return {
        'score': normalized_score,
        'findings': odds_findings + prize_findings + tone_findings,
        'analyzable': True,
        'details': {
            'odds_score': odds_score,
            'prize_score': prize_score,
            'tone_score': tone_score
        }
    }

class GamblingSiteMonitor:
    """
    Selenium-based monitoring system for gambling websites
    """
    def __init__(self, urls, output_dir='screenshots', capture_key='c'):
        self.urls = urls
        self.output_dir = output_dir
        self.capture_key = capture_key
        self.setup_directories()
        self.setup_logging()
        self.setup_chrome_options()

    def setup_directories(self):
        """Create necessary output directories"""
        directories = [
            self.output_dir,
            f"{self.output_dir}/screenshots",
            f"{self.output_dir}/analysis",
            f"{self.output_dir}/raw_html"
        ]
        for directory in directories:
            os.makedirs(directory, exist_ok=True)

    def setup_logging(self):
        """Configure logging"""
        logging.basicConfig(
            filename=f"{self.output_dir}/monitoring.log",
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s'
        )

    def setup_chrome_options(self):
        """Configure Chrome options"""
        self.chrome_options = Options()
        self.chrome_options.add_argument("--window-size=1920,1080")
        self.chrome_options.add_argument("--disable-gpu")
        self.chrome_options.add_argument("--no-sandbox")
        self.chrome_options.add_argument("--disable-dev-shm-usage")

    def capture_and_analyze(self, driver, site_name, url):
        """Capture screenshot and perform analysis"""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        try:
            # Take screenshot
            screenshot_path = f"{self.output_dir}/screenshots/{site_name}_{timestamp}.png"
            driver.save_screenshot(screenshot_path)
            
            # Save HTML
            html_path = f"{self.output_dir}/raw_html/{site_name}_{timestamp}.html"
            with open(html_path, 'w', encoding='utf-8') as f:
                f.write(driver.page_source)
            
            # Perform analysis
            analyzer = OnlineGamblingScoring(driver.page_source)
            analysis = analyzer.get_complete_analysis()
            
            # Save analysis
            analysis_path = f"{self.output_dir}/analysis/{site_name}_{timestamp}.json"
            with open(analysis_path, 'w', encoding='utf-8') as f:
                json.dump(analysis, f, indent=4)
            
            logging.info(f"Captured and analyzed {site_name}: {screenshot_path}")
            return True
            
        except Exception as e:
            logging.error(f"Error capturing {site_name}: {str(e)}")
            return False

    def monitor_sites(self):
        """Main monitoring loop"""
        driver = None
        try:
            driver = webdriver.Chrome(options=self.chrome_options)
            current_site_index = 0
            site_items = list(self.urls.items())
            
            print("\n=== Gambling Site Monitor ===")
            print(f"Press '{self.capture_key}' to capture")
            print("'n' for next site")
            print("'p' for previous site")
            print("'q' to quit\n")
            
            # Load first site
            site_name, url = site_items[current_site_index]
            driver.get(url)
            print(f"Viewing: {site_name}")
            
            while True:
                event = keyboard.read_event(suppress=True)
                if event.event_type == 'down':
                    if event.name == self.capture_key:
                        site_name, url = site_items[current_site_index]
                        if self.capture_and_analyze(driver, site_name, url):
                            print(f"\nCaptured: {site_name}")
                    elif event.name == 'n':
                        current_site_index = (current_site_index + 1) % len(site_items)
                        site_name, url = site_items[current_site_index]
                        driver.get(url)
                        print(f"\nViewing: {site_name}")
                    elif event.name == 'p':
                        current_site_index = (current_site_index - 1) % len(site_items)
                        site_name, url = site_items[current_site_index]
                        driver.get(url)
                        print(f"\nViewing: {site_name}")
                    elif event.name == 'q':
                        print("\nQuitting...")
                        break
                        
        except Exception as e:
            logging.error(f"Monitoring error: {str(e)}")
        finally:
            if driver:
                driver.quit()

# Example usage and implementation

def main():
    """
    Main function demonstrating the complete system usage
    """
    # Define gambling sites to monitor
    gambling_sites = {
        "national_lottery": "https://www.national-lottery.co.uk/games/gamestore",
        "ladbrokes": "https://www.ladbrokes.com/en/games"
    }

    def print_analysis_results(analysis):
        """Helper function to print analysis results in a readable format"""
        print("\n=== Analysis Results ===")
        for category, results in analysis.items():
            print(f"\n{category.replace('_', ' ').title()}:")
            if isinstance(results, dict):
                if 'score' in results:
                    print(f"Score: {results['score']:.2f}/10")
                if 'findings' in results:
                    print("Findings:")
                    for finding in results['findings']:
                        print(f"- {finding}")

    def compare_sites(site1_html, site2_html, site1_name, site2_name):
        """Compare two gambling sites"""
        site1 = OnlineGamblingScoring(site1_html)
        site2 = OnlineGamblingScoring(site2_html)
        
        site1_analysis = site1.get_complete_analysis()
        site2_analysis = site2.get_complete_analysis()
        
        print(f"\n=== Comparison: {site1_name} vs {site2_name} ===")
        categories = [
            'mobile_optimization',
            'game_rules',
            'tc_accessibility',
            'responsible_gaming',
            'language_neutrality'
        ]
        
        print("\n{:<25} {:<15} {:<15}".format("Category", site1_name, site2_name))
        print("-" * 55)
        
        for category in categories:
            score1 = site1_analysis[category].get('score', 'N/A')
            score2 = site2_analysis[category].get('score', 'N/A')
            if isinstance(score1, (int, float)):
                score1 = f"{score1:.2f}"
            if isinstance(score2, (int, float)):
                score2 = f"{score2:.2f}"
            print("{:<25} {:<15} {:<15}".format(
                category.replace('_', ' ').title(),
                score1,
                score2
            ))

    # Example of running manual monitoring with analysis
    def run_monitoring_session():
        monitor = GamblingSiteMonitor(
            urls=gambling_sites,
            output_dir='gambling_monitoring',
            capture_key='c'
        )
        
        try:
            print("\nStarting monitoring session...")
            monitor.monitor_sites()
        except KeyboardInterrupt:
            print("\nMonitoring session ended by user")
        
        # Analyze latest captures
        print("\nAnalyzing latest captures...")
        for site_name in gambling_sites.keys():
            latest_html = get_latest_capture(site_name, monitor.output_dir)
            if latest_html:
                analyzer = OnlineGamblingScoring(latest_html)
                analysis = analyzer.get_complete_analysis()
                print(f"\nResults for {site_name}:")
                print_analysis_results(analysis)

    def get_latest_capture(site_name, output_dir):
        """Get the most recent HTML capture for a site"""
        html_dir = f"{output_dir}/raw_html"
        html_files = glob.glob(f"{html_dir}/{site_name}_*.html")
        if html_files:
            latest_file = max(html_files)
            with open(latest_file, 'r', encoding='utf-8') as f:
                return f.read()
        return None

    # Example of batch analysis of existing captures
    def analyze_existing_captures():
        """Analyze all existing captures in the output directory"""
        output_dir = 'gambling_monitoring'
        results = {}
        
        for site_name in gambling_sites.keys():
            site_results = []
            html_files = glob.glob(f"{output_dir}/raw_html/{site_name}_*.html")
            
            for html_file in html_files:
                with open(html_file, 'r', encoding='utf-8') as f:
                    analyzer = OnlineGamblingScoring(f.read())
                    analysis = analyzer.get_complete_analysis()
                    timestamp = re.search(r'\d{8}_\d{6}', html_file).group()
                    site_results.append({
                        'timestamp': timestamp,
                        'analysis': analysis
                    })
            
            results[site_name] = site_results
        
        return results

if __name__ == "__main__":
    print("Choose operation mode:")
    print("1. Start new monitoring session")
    print("2. Analyze existing captures")
    print("3. Compare latest captures")
    
    choice = input("Enter choice (1-3): ")
    
    if choice == '1':
        run_monitoring_session()
    elif choice == '2':
        results = analyze_existing_captures()
        for site_name, analyses in results.items():
            print(f"\nResults for {site_name}:")
            for analysis in analyses:
                print(f"\nTimestamp: {analysis['timestamp']}")
                print_analysis_results(analysis['analysis'])
    elif choice == '3':
        latest_captures = {
            site_name: get_latest_capture(site_name, 'gambling_monitoring')
            for site_name in gambling_sites.keys()
        }
        if all(latest_captures.values()):
            compare_sites(
                latest_captures['national_lottery'],
                latest_captures['ladbrokes'],
                'National Lottery',
                'Ladbrokes'
            )
        else:
            print("Missing captures for one or more sites")
    else:
        print("Invalid choice")