In [None]:
import json
import os
from datetime import datetime
from google.colab import drive

# Mount Google Drive
try:
    drive.mount('/content/drive')
except:
    print("Drive already mounted")

# =====================================================
# INDUSTRY TEMPLATES LIBRARY
# =====================================================

class IndustryTemplates:
    """Pre-built configuration templates for different industries"""

    @staticmethod
    def get_academic_template():
        """Academic/Education industry template (placeholder example)"""
        return {
            "template_name": "Academic/Education",
            "description": "Template for universities, colleges, and educational institutions",
            "categories": {
                "academic_programs": {
                    "weight": 0.25,
                    "display_name": "Academic Programs",
                    "description": "Degree programs, courses, curriculum offerings",
                    "patterns": [
                        r'\b(degree programs?|courses?|curriculum|majors?|minors?)\s+(offers?|includes?|provides?|features?)\s+([^.]{1,60})',
                        r'\b(bachelor|master|doctoral|PhD|certificate)\s+(in|of|programs?)\s+([^.]{1,60})',
                        r'\b(academic programs?|educational offerings?|study options?)\s+(include|encompasses?|covers?)\s+([^.]{1,60})'
                    ],
                    "entities": ["degree", "program", "curriculum", "courses", "major", "minor", "bachelor", "master", "PhD"]
                },
                "student_outcomes": {
                    "weight": 0.30,
                    "display_name": "Student Outcomes",
                    "description": "Graduation rates, career placement, alumni success",
                    "patterns": [
                        r'\b(graduation rate|employment rate|job placement|career outcomes?)\s+(of|is|reaches?|achieves?)\s+([^.]{1,60})',
                        r'\b(students?|graduates?|alumni)\s+(achieve|secure|obtain|find)\s+([^.]{1,60})',
                        r'\b(success rate|placement rate|outcome statistics?)\s+(shows?|indicates?|demonstrates?)\s+([^.]{1,60})'
                    ],
                    "entities": ["graduation", "employment", "career", "alumni", "success", "placement", "outcomes"]
                },
                "accreditation": {
                    "weight": 0.20,
                    "display_name": "Accreditation & Quality",
                    "description": "Accreditation status, certifications, quality indicators",
                    "patterns": [
                        r'\b(accredited by|certified by|approved by|recognized by)\s+([^.]{1,60})',
                        r'\b(accreditation|certification|approval|recognition)\s+(from|by|ensures?|guarantees?)\s+([^.]{1,60})',
                        r'\b(quality|standards?|excellence)\s+(maintained|upheld|demonstrated)\s+([^.]{1,60})'
                    ],
                    "entities": ["accredited", "certified", "approved", "recognition", "quality", "standards", "excellence"]
                },
                "student_support": {
                    "weight": 0.15,
                    "display_name": "Student Support",
                    "description": "Services and support for students",
                    "patterns": [
                        r'\b(for students?|student services?|support services?)\s+(includes?|offers?|provides?)\s+([^.]{1,60})',
                        r'\b(students?)\s+(receive|access|benefit from|enjoy)\s+([^.]{1,60})',
                        r'\b(academic support|tutoring|counseling|advising)\s+(helps?|assists?|guides?)\s+([^.]{1,60})'
                    ],
                    "entities": ["students", "support", "services", "tutoring", "counseling", "advising", "assistance"]
                },
                "differentiators": {
                    "weight": 0.10,
                    "display_name": "Differentiators",
                    "description": "Competitive advantages and unique features",
                    "patterns": [
                        r'\b(unlike other universities?|compared to other schools?|different from traditional)\s+([^.]{1,80})',
                        r'\b(only|exclusively|unique|distinctive|innovative)\s+([^.]{1,60})',
                        r'\b(our university|our institution|our approach)\s+(is the only|uniquely|exclusively)\s+([^.]{1,60})'
                    ],
                    "entities": ["unique", "innovative", "distinctive", "exclusive", "different", "special"]
                }
            },
            "competitors": ["harvard university", "stanford university", "mit", "yale university", "princeton"],
            "compliance_terms": ["accredited", "licensed", "approved", "certified", "recognized"],
            "priority_thresholds": {
                "high": 0.3,
                "medium": 0.6
            },
            "noise_patterns": [
                "student portal", "login", "apply now", "request information",
                "campus tour", "virtual tour", "contact admissions"
            ]
        }

    @staticmethod
    def get_healthcare_template():
        """Healthcare/Medical industry template"""
        return {
            "template_name": "Healthcare/Medical",
            "description": "Template for hospitals, clinics, and medical practices",
            "categories": {
                "medical_services": {
                    "weight": 0.25,
                    "display_name": "Medical Services",
                    "patterns": [
                        r'\b(treatments?|procedures?|surgeries?|therapies?)\s+(offers?|provides?|performs?)\s+([^.]{1,60})',
                        r'\b(medical services?|healthcare services?)\s+(include|encompasses?)\s+([^.]{1,60})'
                    ],
                    "entities": ["treatment", "procedure", "surgery", "therapy", "medical", "healthcare"]
                },
                "patient_outcomes": {
                    "weight": 0.30,
                    "display_name": "Patient Outcomes",
                    "patterns": [
                        r'\b(success rate|recovery rate|patient satisfaction)\s+(of|is|reaches?)\s+([^.]{1,60})',
                        r'\b(patients?)\s+(recover|improve|heal|benefit)\s+([^.]{1,60})'
                    ],
                    "entities": ["success", "recovery", "satisfaction", "patients", "outcomes", "results"]
                }
                # ... other healthcare categories
            },
            "competitors": ["mayo clinic", "cleveland clinic", "johns hopkins"],
            "compliance_terms": ["FDA approved", "board certified", "licensed", "accredited"],
            "priority_thresholds": {"high": 0.3, "medium": 0.6}
        }

    @staticmethod
    def get_available_templates():
        """Get list of all available templates"""
        return {
            "academic": IndustryTemplates.get_academic_template(),
            "healthcare": IndustryTemplates.get_healthcare_template()
            # Add more templates as needed
        }

# =====================================================
# CONFIGURATION MANAGER
# =====================================================

class ConfigurationManager:
    """Manages client configurations and Drive integration"""

    def __init__(self, base_drive_path="SemanticTool_V2"):
        self.base_path = f"/content/drive/MyDrive/{base_drive_path}"
        self.templates_path = f"{self.base_path}/Templates"
        self.configs_path = f"{self.base_path}/Configs"
        self.analyses_path = f"{self.base_path}/Analyses"

        # Create directory structure
        self._create_directory_structure()

    def _create_directory_structure(self):
        """Create necessary directories in Google Drive"""
        directories = [
            self.base_path,
            self.templates_path,
            self.configs_path,
            self.analyses_path
        ]

        for directory in directories:
            os.makedirs(directory, exist_ok=True)

        print(f"‚úÖ Directory structure created in: {self.base_path}")

    def save_template(self, template_name, template_config):
        """Save a template configuration to Drive"""
        file_path = f"{self.templates_path}/{template_name}.json"

        with open(file_path, 'w') as f:
            json.dump(template_config, f, indent=2)

        print(f"‚úÖ Template saved: {template_name}.json")

    def load_template(self, template_name):
        """Load a template configuration from Drive"""
        file_path = f"{self.templates_path}/{template_name}.json"

        try:
            with open(file_path, 'r') as f:
                return json.load(f)
        except FileNotFoundError:
            print(f"‚ùå Template not found: {template_name}.json")
            return None

    def create_client_config(self, client_name, template_name=None, custom_config=None):
        """Create a client-specific configuration"""

        if custom_config:
            config = custom_config
        elif template_name:
            # Load from template
            if template_name in IndustryTemplates.get_available_templates():
                config = IndustryTemplates.get_available_templates()[template_name]
            else:
                config = self.load_template(template_name)
                if not config:
                    return None
        else:
            print("‚ùå Must provide either template_name or custom_config")
            return None

        # Add client-specific metadata
        client_config = {
            "client_name": client_name,
            "created_date": datetime.now().isoformat(),
            "template_used": template_name,
            "file_naming": {
                "analysis_prefix": f"{client_name}_{datetime.now().strftime('%Y_%m')}",
                "folder_structure": f"{client_name}_{datetime.now().strftime('%Y_%m')}"
            },
            **config
        }

        # Save client config
        config_file = f"{self.configs_path}/{client_name}_config.json"
        with open(config_file, 'w') as f:
            json.dump(client_config, f, indent=2)

        print(f"‚úÖ Client configuration created: {client_name}_config.json")
        return client_config

    def load_client_config(self, client_name):
        """Load client configuration from Drive"""
        config_file = f"{self.configs_path}/{client_name}_config.json"

        try:
            with open(config_file, 'r') as f:
                return json.load(f)
        except FileNotFoundError:
            print(f"‚ùå Client config not found: {client_name}_config.json")
            return None

    def create_analysis_folder(self, client_config):
        """Create organized folder structure for analysis"""
        folder_name = client_config["file_naming"]["folder_structure"]
        analysis_folder = f"{self.analyses_path}/{folder_name}"

        # Create subfolders
        subfolders = ["input_files", "results", "visualizations", "exports"]

        for subfolder in subfolders:
            os.makedirs(f"{analysis_folder}/{subfolder}", exist_ok=True)

        print(f"‚úÖ Analysis folder created: {folder_name}")
        return analysis_folder

    def list_available_configs(self):
        """List all available client configurations"""
        try:
            configs = [f for f in os.listdir(self.configs_path) if f.endswith('_config.json')]
            return [f.replace('_config.json', '') for f in configs]
        except:
            return []

# =====================================================
# CONFIGURATION SETUP FUNCTIONS
# =====================================================

def setup_configuration_system():
    """Initialize the configuration system"""
    print("üöÄ Setting up V2 Configuration System")

    # Initialize configuration manager
    config_manager = ConfigurationManager()

    # Save industry templates to Drive
    templates = IndustryTemplates.get_available_templates()
    for template_name, template_config in templates.items():
        config_manager.save_template(template_name, template_config)

    print("‚úÖ Configuration system ready!")
    return config_manager

def create_new_client(client_name, industry_template="academic", config_manager=None):
    """Create a new client configuration using academic template as example"""

    if not config_manager:
        config_manager = ConfigurationManager()

    print(f"üìã Creating client configuration: {client_name}")
    print(f"üìã Using template: {industry_template}")

    # Create client config
    client_config = config_manager.create_client_config(
        client_name=client_name,
        template_name=industry_template
    )

    # Create analysis folders
    analysis_folder = config_manager.create_analysis_folder(client_config)

    print(f"‚úÖ Client setup complete: {client_name}")
    print(f"üìÅ Analysis folder: {analysis_folder}")

    return client_config, analysis_folder, config_manager

def modify_client_config(client_name, modifications, config_manager=None):
    """Modify existing client configuration"""

    if not config_manager:
        config_manager = ConfigurationManager()

    # Load existing config
    config = config_manager.load_client_config(client_name)
    if not config:
        return None

    # Apply modifications
    for key, value in modifications.items():
        if key in config:
            config[key] = value
        else:
            print(f"‚ö†Ô∏è Key '{key}' not found in config")

    # Save updated config
    config_file = f"{config_manager.configs_path}/{client_name}_config.json"
    with open(config_file, 'w') as f:
        json.dump(config, f, indent=2)

    print(f"‚úÖ Configuration updated for: {client_name}")
    return config

# =====================================================
# EXAMPLE USAGE & TESTING
# =====================================================

def demo_configuration_system():
    """Demonstrate the configuration system with academic examples"""

    print("üéì DEMO: Academic Institution Configuration")
    print("=" * 50)

    # Setup system
    config_manager = setup_configuration_system()

    # Create example academic client
    client_config, analysis_folder, config_manager = create_new_client(
        client_name="StateUniversity_VetSchool",
        industry_template="academic",
        config_manager=config_manager
    )

    # Show configuration details
    print("\nüìä Client Configuration Summary:")
    print(f"Client: {client_config['client_name']}")
    print(f"Template: {client_config['template_used']}")
    print(f"Categories: {len(client_config['categories'])}")

    for cat_name, cat_config in client_config['categories'].items():
        print(f"  - {cat_config['display_name']}: {cat_config['weight']}")

    print(f"\nüìÅ File Structure:")
    print(f"Base Path: {config_manager.base_path}")
    print(f"Analysis Folder: {analysis_folder}")

    return config_manager, client_config

# =====================================================
# READY TO USE
# =====================================================

print("‚úÖ V2 Configuration System Loaded!")
print("\nQuick Start:")
print("1. config_manager, client_config = demo_configuration_system()")
print("2. Or create custom: config_manager = setup_configuration_system()")
print("3. Then: client_config, folder, manager = create_new_client('YourClient', 'academic')")

Cell 2: V2 Semantic Analyzer

- Configuration-Driven Analysis - Uses client config for categories, weights, patterns

- Auto Entity Detection - Automatically uses entity data if available,
gracefully falls back if not

- Enhanced Content Cleaning - Robust cleaning system with configuration-aware filters

- Weighted Scoring - Category scores use configured weights

- Complete Drive Integration - All file handling through Google Drive

- Comprehensive Results - Detailed analysis with entity enhancement indicators

In [None]:
import pandas as pd
import numpy as np
import re
from collections import defaultdict, Counter
import warnings
warnings.filterwarnings('ignore')

# =====================================================
# V2 ENHANCED SEMANTIC ANALYZER
# =====================================================

class SemanticAnalyzerV2:
    """Version 2 Semantic Analyzer with configuration-driven analysis"""

    def __init__(self, client_config):
        """Initialize analyzer with client configuration"""
        self.config = client_config
        self.client_name = client_config['client_name']
        self.categories = client_config['categories']
        self.competitors = client_config.get('competitors', [])
        self.compliance_terms = client_config.get('compliance_terms', [])
        self.priority_thresholds = client_config['priority_thresholds']
        self.noise_patterns = client_config.get('noise_patterns', [])

        # Entity analysis will be detected automatically
        self.entity_data = None
        self.entity_enhanced = False

        print(f"üéØ Analyzer initialized for: {self.client_name}")
        print(f"üìä Categories configured: {len(self.categories)}")
        print(f"üèÜ Entity enhancement: Will auto-detect")

    def load_entity_data(self, entity_file_path):
        """Load and clean entity analysis data (optional)"""
        try:
            print(f"üîç Attempting to load entity data from: {entity_file_path}")

            # Load entity data
            entity_df = pd.read_excel(entity_file_path, sheet_name='Entity Sentiment Data')

            # Clean entity data - remove noise entities
            base_noise_entities = ['cookies', 'site', 'privacy', 'policy', 'terms', 'website', 'page', 'javascript']
            config_noise_entities = [term.lower() for term in self.noise_patterns]
            all_noise_entities = base_noise_entities + config_noise_entities

            cleaned_entities = entity_df[~entity_df['Entity'].str.lower().isin(all_noise_entities)]

            # Filter by minimum salience
            meaningful_entities = cleaned_entities[cleaned_entities['Salience'] > 0.01]

            self.entity_data = meaningful_entities
            self.entity_enhanced = True

            print(f"‚úÖ Entity analysis loaded: {len(meaningful_entities)} meaningful entities")
            print(f"üéØ Entity enhancement: ENABLED")

            return meaningful_entities

        except Exception as e:
            print(f"‚ö†Ô∏è Entity data not available: {str(e)}")
            print(f"üìù Proceeding with text-only analysis")
            self.entity_enhanced = False
            return None

    def get_page_entities(self, url):
        """Get entities for a specific page"""
        if self.entity_data is None:
            return pd.DataFrame()

        # Filter entities for this specific URL
        page_entities = self.entity_data[self.entity_data['ID'] == url].copy()
        return page_entities

    def clean_content(self, content):
        """Enhanced content cleaning with configuration-aware filters"""
        if not content:
            return ""

        # Base noise patterns
        base_patterns = [
            r'cookies?.*?privacy.*?policy',
            r'javascript.*?enabled',
            r'website.*?functionality',
            r'terms.*?conditions',
            r'privacy.*?policy',
            r'site.*?navigation',
            r'accept.*?cookies',
            r'cookie.*?settings',
            r'we use cookies',
            r'this website uses',
            r'by continuing to use'
        ]

        # Add configuration-specific noise patterns
        config_patterns = [
            rf'\b{re.escape(term.lower())}\b.*?[\.\!\?]'
            for term in self.noise_patterns
        ]

        all_patterns = base_patterns + config_patterns

        # Clean content
        cleaned = content
        for pattern in all_patterns:
            cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE | re.DOTALL)

        # Remove excessive whitespace
        cleaned = re.sub(r'\s+', ' ', cleaned).strip()

        # Enhanced UI element removal
        ui_patterns = [
            r'These cookies.*?understand how users interact with[^.]*\.',
            r'We use cookies.*?improve.*?performance[^.]*\.',
            r'By clicking.*?accept.*?cookies[^.]*\.',
            r'Footer.*?navigation.*?menu[^.]*\.',
            r'Header.*?logo.*?menu[^.]*\.',
            r'Menu.*?navigation.*?links[^.]*\.',
            r'Contact us.*?for more information[^.]*\.',
            r'Click here.*?to learn more[^.]*\.'
        ]

        for pattern in ui_patterns:
            cleaned = re.sub(pattern, '', cleaned, flags=re.IGNORECASE | re.DOTALL)

        return cleaned

    def extract_category_triples(self, content, category_name, page_entities=None):
        """Extract semantic triples for a specific category using configuration"""
        if not content:
            return []

        category_config = self.categories[category_name]
        triples = []

        # Extract triples using configured patterns
        for pattern in category_config['patterns']:
            matches = re.finditer(pattern, content, re.IGNORECASE)
            for match in matches:
                groups = match.groups()
                if len(groups) >= 3:
                    subject = groups[0].strip()
                    predicate = groups[1].strip() if len(groups) > 1 else 'relates_to'
                    obj = groups[2].strip()

                    # Clean up the extracted parts
                    subject = re.sub(r'\s+', ' ', subject)
                    obj = re.sub(r'\s+', ' ', obj)

                    # Calculate base confidence
                    base_confidence = self._calculate_triple_confidence(
                        subject, predicate, obj, category_name
                    )

                    triple = {
                        'category': category_name,
                        'subject': subject,
                        'predicate': predicate,
                        'object': obj,
                        'confidence': base_confidence,
                        'pattern_match': match.group(0)[:100] + '...' if len(match.group(0)) > 100 else match.group(0)
                    }

                    # Enhance confidence with entity analysis if available
                    if page_entities is not None and not page_entities.empty:
                        triple['confidence'] = self._enhance_triple_confidence_with_entities(
                            triple, page_entities
                        )
                        triple['entity_enhanced'] = True
                    else:
                        triple['entity_enhanced'] = False

                    triples.append(triple)

        return triples

    def _calculate_triple_confidence(self, subject, predicate, obj, category_name):
        """Calculate base confidence score using configuration"""
        confidence = 0.5  # Base confidence

        # Bonus for category-specific entities
        category_entities = self.categories[category_name].get('entities', [])
        for entity in category_entities:
            if entity.lower() in subject.lower() or entity.lower() in obj.lower():
                confidence += 0.2
                break

        # Bonus for strong predicates
        strong_predicates = ['provides', 'offers', 'helps', 'solves', 'eliminates', 'lasts', 'includes', 'achieves', 'delivers']
        if predicate.lower() in strong_predicates:
            confidence += 0.15

        # Bonus for client-specific terms
        client_terms = [self.client_name.lower().split('_')[0]]  # Extract base client name
        if any(term in subject.lower() for term in client_terms):
            confidence += 0.15

        # Bonus for compliance terms
        if any(term.lower() in subject.lower() or term.lower() in obj.lower()
               for term in self.compliance_terms):
            confidence += 0.1

        # Penalty for very long or very short objects
        if len(obj) < 10 or len(obj) > 80:
            confidence -= 0.1

        return min(1.0, max(0.1, confidence))

    def _enhance_triple_confidence_with_entities(self, triple, page_entities):
        """Use entity analysis to enhance triple confidence scores"""
        enhanced_confidence = triple['confidence']

        # Look for subject entity in entity analysis
        subject_entities = page_entities[
            page_entities['Entity'].str.lower().str.contains(triple['subject'].lower(), na=False)
        ]

        if not subject_entities.empty:
            # Boost confidence based on entity salience
            max_salience = subject_entities['Salience'].max()
            salience_boost = min(0.2, max_salience * 2)
            enhanced_confidence += salience_boost

            # Check sentiment alignment
            avg_sentiment = subject_entities['Sentiment Score'].mean()
            if triple['category'] in ['student_outcomes', 'accreditation', 'academic_programs'] and avg_sentiment > 0:
                enhanced_confidence += 0.1
            elif triple['category'] == 'differentiators' and avg_sentiment > 0:
                enhanced_confidence += 0.1

        # Look for object entity
        object_entities = page_entities[
            page_entities['Entity'].str.lower().str.contains(triple['object'].lower(), na=False)
        ]

        if not object_entities.empty:
            max_salience = object_entities['Salience'].max()
            salience_boost = min(0.15, max_salience * 1.5)
            enhanced_confidence += salience_boost

        return min(1.0, enhanced_confidence)

    def analyze_entity_quality(self, url, page_entities):
        """Analyze entity quality issues for a page"""
        if page_entities.empty:
            return {
                'total_entities': 0,
                'entity_issues': [],
                'entity_quality_score': 0,
                'top_entities': []
            }

        issues = []

        # Check for high-salience noise entities
        high_salience_entities = page_entities[page_entities['Salience'] > 0.1]
        for _, entity in high_salience_entities.iterrows():
            entity_name = entity['Entity'].lower()
            base_noise = ['cookie', 'site', 'privacy', 'policy']
            config_noise = [term.lower() for term in self.noise_patterns]
            all_noise = base_noise + config_noise

            if any(noise in entity_name for noise in all_noise):
                issues.append(f"High-salience noise entity: '{entity['Entity']}' (salience: {entity['Salience']:.3f})")

        # Check for business entities classified as "OTHER"
        client_terms = [word.lower() for word in self.client_name.split('_')]
        other_entities = page_entities[page_entities['Type'] == 'OTHER']

        for _, entity in other_entities.iterrows():
            entity_name = entity['Entity'].lower()
            if any(term in entity_name for term in client_terms) and entity['Salience'] > 0.05:
                issues.append(f"Business entity classified as OTHER: '{entity['Entity']}' (should be ORGANIZATION)")

        # Calculate entity quality score
        total_entities = len(page_entities)
        meaningful_entities = len(page_entities[page_entities['Salience'] > 0.02])
        entity_quality_score = meaningful_entities / total_entities if total_entities > 0 else 0

        # Get top entities
        top_entities = page_entities.nlargest(5, 'Salience')[
            ['Entity', 'Type', 'Salience', 'Sentiment Score']
        ].to_dict('records')

        return {
            'total_entities': total_entities,
            'meaningful_entities': meaningful_entities,
            'entity_issues': issues,
            'entity_quality_score': entity_quality_score,
            'top_entities': top_entities
        }

    def analyze_page_comprehensively(self, url, content):
        """Comprehensive analysis of a single page with configuration"""

        # Clean the content
        cleaned_content = self.clean_content(content)

        # Get entity data for this page if available
        page_entities = self.get_page_entities(url)

        # Analyze entity quality
        entity_analysis = self.analyze_entity_quality(url, page_entities)

        # Extract triples for each configured category
        all_triples = []
        category_scores = {}

        for category_name in self.categories.keys():
            triples = self.extract_category_triples(cleaned_content, category_name, page_entities)
            all_triples.extend(triples)

            # Calculate category score based on triples found
            high_conf_triples = [t for t in triples if t['confidence'] > 0.6]
            category_scores[category_name] = {
                'total_triples': len(triples),
                'high_confidence_triples': len(high_conf_triples),
                'avg_confidence': np.mean([t['confidence'] for t in triples]) if triples else 0,
                'score': min(1.0, len(high_conf_triples) / 3.0),
                'weight': self.categories[category_name]['weight'],
                'entity_enhanced_triples': len([t for t in triples if t.get('entity_enhanced', False)])
            }

        # Calculate overall page metrics
        word_count = len(cleaned_content.split())

        metrics = {
            'url': url,
            'url_short': self._get_url_short(url),
            'client_name': self.client_name,
            'analysis_mode': 'Enhanced (Entity + Text)' if self.entity_enhanced else 'Text-Only',
            'original_content_length': len(content),
            'cleaned_content_length': len(cleaned_content),
            'noise_ratio': 1 - (len(cleaned_content) / len(content)) if content else 0,
            'word_count': word_count,
            'total_triples': len(all_triples),
            'high_confidence_triples': len([t for t in all_triples if t['confidence'] > 0.6]),
            'entity_enhanced_triples': len([t for t in all_triples if t.get('entity_enhanced', False)]),
            'category_coverage': len([cat for cat, scores in category_scores.items() if scores['total_triples'] > 0]),
            'category_scores': category_scores,
            'semantic_triples': all_triples,
            'entity_analysis': entity_analysis
        }

        # Calculate composite semantic quality score using configuration weights
        metrics['semantic_quality_score'] = self._calculate_weighted_quality_score(category_scores, entity_analysis)

        # Determine priority based on configuration thresholds
        metrics['priority'] = self._determine_priority(
            metrics['semantic_quality_score'],
            metrics['category_coverage']
        )

        return metrics

    def _get_url_short(self, url):
        """Extract short URL identifier"""
        parts = url.split('/')
        if len(parts) > 3:
            return parts[-2] if parts[-1] == '' else parts[-1]
        return url.split('/')[-1] or url

    def _calculate_weighted_quality_score(self, category_scores, entity_analysis):
        """Calculate weighted semantic quality score using configuration"""
        base_score = 0

        # Calculate weighted category scores
        for category_name, scores in category_scores.items():
            weight = scores['weight']
            category_score = scores['score']
            base_score += weight * category_score

        # Add entity quality factor if entity analysis is available
        if self.entity_enhanced and entity_analysis['total_entities'] > 0:
            entity_quality_factor = entity_analysis['entity_quality_score'] * 0.1
            base_score += entity_quality_factor

        return min(1.0, base_score)

    def _determine_priority(self, quality_score, category_coverage):
        """Determine priority based on configuration thresholds"""
        high_threshold = self.priority_thresholds['high']
        medium_threshold = self.priority_thresholds['medium']

        if quality_score < high_threshold or category_coverage < 3:
            return 'HIGH'
        elif quality_score < medium_threshold or category_coverage < 4:
            return 'MEDIUM'
        else:
            return 'LOW'

# =====================================================
# V2 ANALYSIS EXECUTION FUNCTIONS
# =====================================================

def analyze_with_configuration(text_file_path, client_config, entity_file_path=None):
    """Main analysis function using client configuration"""

    print(f"üöÄ Starting V2 Semantic Analysis")
    print(f"üë§ Client: {client_config['client_name']}")
    print(f"üìã Template: {client_config.get('template_used', 'Custom')}")
    print("=" * 50)

    # Initialize analyzer with configuration
    analyzer = SemanticAnalyzerV2(client_config)

    # Auto-detect and load entity data if available
    if entity_file_path:
        analyzer.load_entity_data(entity_file_path)

    # Load text data
    print(f"üìñ Loading text data from: {text_file_path}")
    text_df = pd.read_excel(text_file_path)
    print(f"‚úÖ Loaded {len(text_df)} pages for analysis")

    # Analyze all pages
    results = []
    print("\nüîç Analyzing pages...")

    for index, row in text_df.iterrows():
        url = row['URL']
        content = row['content']

        page_name = analyzer._get_url_short(url)
        if index % 5 == 0 or index == len(text_df) - 1:
            print(f"   Processing page {index+1}/{len(text_df)}: {page_name}")

        # Analyze this page
        analysis = analyzer.analyze_page_comprehensively(url, content)
        results.append(analysis)

    # Convert to DataFrame for easier analysis
    results_df = pd.DataFrame([
        {
            'url_short': r['url_short'],
            'client_name': r['client_name'],
            'analysis_mode': r['analysis_mode'],
            'semantic_quality_score': r['semantic_quality_score'],
            'total_triples': r['total_triples'],
            'high_confidence_triples': r['high_confidence_triples'],
            'entity_enhanced_triples': r['entity_enhanced_triples'],
            'category_coverage': r['category_coverage'],
            'noise_ratio': r['noise_ratio'],
            'priority': r['priority'],
            'entity_quality_score': r['entity_analysis']['entity_quality_score'],
            'entity_issues_count': len(r['entity_analysis']['entity_issues']),
            **{f"{cat}_score": r['category_scores'][cat]['score'] for cat in client_config['categories'].keys()}
        }
        for r in results
    ])

    # Analysis summary
    print("\n" + "=" * 50)
    print("üìä ANALYSIS COMPLETE")
    print(f"‚úÖ Pages analyzed: {len(results_df)}")
    print(f"üìà Average quality score: {results_df['semantic_quality_score'].mean():.3f}")
    print(f"‚ö° Analysis mode: {results[0]['analysis_mode']}")
    print(f"üî• Entity enhancement: {'YES' if analyzer.entity_enhanced else 'NO'}")

    # Priority breakdown
    priority_counts = results_df['priority'].value_counts()
    for priority, count in priority_counts.items():
        print(f"üéØ {priority} priority pages: {count}")

    return results, results_df, analyzer

def save_analysis_results(results, results_df, client_config, analysis_folder):
    """Save analysis results to organized Drive folders"""

    print(f"\nüíæ Saving results to: {analysis_folder}")

    # Generate file names using client configuration
    file_prefix = client_config["file_naming"]["analysis_prefix"]

    # Main results file
    main_results_file = f"{analysis_folder}/results/{file_prefix}_analysis_results.xlsx"

    with pd.ExcelWriter(main_results_file, engine='openpyxl') as writer:
        # Summary results
        results_df.to_excel(writer, sheet_name='Summary Results', index=False)

        # Category breakdown
        category_breakdown = []
        for result in results:
            for category, scores in result['category_scores'].items():
                category_breakdown.append({
                    'URL': result['url_short'],
                    'Category': category.replace('_', ' ').title(),
                    'Score': scores['score'],
                    'Weight': scores['weight'],
                    'Total Triples': scores['total_triples'],
                    'High Confidence Triples': scores['high_confidence_triples']
                })

        category_df = pd.DataFrame(category_breakdown)
        category_df.to_excel(writer, sheet_name='Category Analysis', index=False)

        # All semantic triples
        all_triples = []
        for result in results:
            for triple in result['semantic_triples']:
                all_triples.append({
                    'URL': result['url_short'],
                    'Category': triple['category'].replace('_', ' ').title(),
                    'Subject': triple['subject'],
                    'Predicate': triple['predicate'],
                    'Object': triple['object'],
                    'Confidence': round(triple['confidence'], 3),
                    'Entity Enhanced': 'Yes' if triple.get('entity_enhanced', False) else 'No'
                })

        if all_triples:
            triples_df = pd.DataFrame(all_triples)
            triples_df.to_excel(writer, sheet_name='Semantic Triples', index=False)

    # Individual URL reports
    url_reports_folder = f"{analysis_folder}/results/individual_reports"
    os.makedirs(url_reports_folder, exist_ok=True)

    for result in results:
        url_file = f"{url_reports_folder}/{result['url_short']}_analysis.xlsx"

        with pd.ExcelWriter(url_file, engine='openpyxl') as writer:
            # Overview
            overview_data = {
                'Metric': ['URL', 'Quality Score', 'Priority', 'Analysis Mode', 'Category Coverage'],
                'Value': [
                    result['url'],
                    round(result['semantic_quality_score'], 3),
                    result['priority'],
                    result['analysis_mode'],
                    f"{result['category_coverage']}/{len(client_config['categories'])}"
                ]
            }

            overview_df = pd.DataFrame(overview_data)
            overview_df.to_excel(writer, sheet_name='Overview', index=False)

    print(f"‚úÖ Results saved successfully!")
    print(f"üìÅ Main file: {main_results_file}")
    print(f"üìÅ Individual reports: {url_reports_folder}")

    return main_results_file

# =====================================================
# COMPLETE WORKFLOW FUNCTION
# =====================================================

def run_complete_analysis_v2(client_name, text_file_drive_path, entity_file_drive_path=None, config_manager=None):
    """Complete V2 analysis workflow"""

    print("üöÄ SEMANTIC ANALYZER V2 - COMPLETE WORKFLOW")
    print("=" * 60)

    # Load configuration
    if not config_manager:
        config_manager = ConfigurationManager()

    client_config = config_manager.load_client_config(client_name)
    if not client_config:
        print(f"‚ùå Client configuration not found: {client_name}")
        print("Available clients:", config_manager.list_available_configs())
        return None

    # Create analysis folder
    analysis_folder = config_manager.create_analysis_folder(client_config)

    # Construct full file paths
    base_drive_path = "/content/drive/MyDrive"
    text_file_path = f"{base_drive_path}/{text_file_drive_path}"
    entity_file_path = f"{base_drive_path}/{entity_file_drive_path}" if entity_file_drive_path else None

    # Run analysis
    results, results_df, analyzer = analyze_with_configuration(
        text_file_path,
        client_config,
        entity_file_path
    )

    # Save results
    main_file = save_analysis_results(results, results_df, client_config, analysis_folder)

    print("\nüéâ V2 ANALYSIS COMPLETE!")
    print(f"üìä Ready for manual review and visualization")

    return results, results_df, analyzer, client_config, analysis_folder

# =====================================================
# READY TO USE
# =====================================================

print("‚úÖ V2 Semantic Analyzer Loaded!")
print("\nQuick Start:")
print("1. First run Cell 1 to set up configuration")
print("2. Create client: client_config, folder, manager = create_new_client('ClientName', 'academic')")
print("3. Run analysis: results, results_df, analyzer, config, folder = run_complete_analysis_v2(")
print("   'ClientName', 'path/to/text_file.xlsx', 'path/to/entity_file.xlsx')")

Cell 3: V2 Visualization Suite

- Configuration-Aware Visualizations - Uses client categories and display names
- Executive Dashboard - High-level metrics for stakeholders
- Category Performance Analysis - Detailed breakdown of configured categories
- Content Optimization Matrix - Interactive priority visualization
- Semantic Triple Analysis - Deep dive into relationships found
- Competitive Analysis - Competitor mentions and differentiation language
- Flexible Execution - Run all or specific visualizations

Key Features:

- Uses academic template category names and structure
- Shows entity enhancement impact when available
- Adapts to any number of configured categories
- Interactive Plotly charts for detailed exploration
- Configuration-driven thresholds and competitor analysis

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# =====================================================
# V2 VISUALIZATION SUITE
# =====================================================

class VisualizationSuiteV2:
    """Configuration-aware visualization suite for V2 results"""

    def __init__(self, results, results_df, client_config, analyzer):
        self.results = results
        self.results_df = results_df
        self.client_config = client_config
        self.analyzer = analyzer
        self.client_name = client_config['client_name']
        self.categories = list(client_config['categories'].keys())
        self.category_display_names = {
            cat: config['display_name']
            for cat, config in client_config['categories'].items()
        }

        print(f"üé® Visualization Suite initialized for: {self.client_name}")
        print(f"üìä Analysis mode: {results[0]['analysis_mode']}")
        print(f"üèÜ Categories: {len(self.categories)}")

    def create_executive_dashboard(self):
        """Create high-level executive dashboard"""

        print("üìà Creating Executive Dashboard...")

        # Calculate key metrics
        total_pages = len(self.results_df)
        avg_quality = self.results_df['semantic_quality_score'].mean()
        high_priority = len(self.results_df[self.results_df['priority'] == 'HIGH'])
        medium_priority = len(self.results_df[self.results_df['priority'] == 'MEDIUM'])
        low_priority = len(self.results_df[self.results_df['priority'] == 'LOW'])

        # Create executive metrics visualization
        fig, axes = plt.subplots(2, 3, figsize=(18, 10))

        # Key metrics boxes
        metrics = [
            ('Pages Analyzed', total_pages, 'blue'),
            ('Avg Quality Score', f'{avg_quality:.3f}', 'green'),
            ('High Priority Pages', high_priority, 'red'),
            ('Medium Priority Pages', medium_priority, 'orange'),
            ('Low Priority Pages', low_priority, 'darkgreen'),
            ('Analysis Mode', self.results[0]['analysis_mode'], 'purple')
        ]

        for i, (label, value, color) in enumerate(metrics):
            row, col = i // 3, i % 3
            axes[row, col].text(0.5, 0.5, str(value), ha='center', va='center',
                               fontsize=24, fontweight='bold', color=color)
            axes[row, col].text(0.5, 0.2, label, ha='center', va='center',
                               fontsize=12, wrap=True)
            axes[row, col].set_xlim(0, 1)
            axes[row, col].set_ylim(0, 1)
            axes[row, col].axis('off')

            # Add border
            for spine in ['top', 'right', 'bottom', 'left']:
                axes[row, col].spines[spine].set_visible(True)
                axes[row, col].spines[spine].set_color(color)
                axes[row, col].spines[spine].set_linewidth(2)

        plt.suptitle(f'Semantic Analysis Executive Summary - {self.client_name}',
                    fontsize=16, fontweight='bold')
        plt.tight_layout()
        plt.show()

    def create_category_performance_analysis(self):
        """Create category-specific performance analysis"""

        print("üìä Creating Category Performance Analysis...")

        # Prepare category data
        category_columns = [f"{cat}_score" for cat in self.categories]
        category_data = self.results_df[category_columns]

        # Rename columns for display
        display_columns = {}
        for cat in self.categories:
            display_name = self.category_display_names.get(cat, cat.replace('_', ' ').title())
            display_columns[f"{cat}_score"] = display_name

        category_data = category_data.rename(columns=display_columns)

        # Create visualizations
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))

        # 1. Category Performance Radar Chart
        categories_display = list(display_columns.values())
        category_averages = [category_data[col].mean() for col in categories_display]

        # Radar chart setup
        angles = np.linspace(0, 2*np.pi, len(categories_display), endpoint=False).tolist()
        category_averages += category_averages[:1]  # Complete the circle
        angles += angles[:1]

        axes[0,0].plot(angles, category_averages, 'o-', linewidth=2, color='blue', label='Average Score')
        axes[0,0].fill(angles, category_averages, alpha=0.25, color='blue')
        axes[0,0].set_xticks(angles[:-1])
        axes[0,0].set_xticklabels(categories_display, fontsize=10)
        axes[0,0].set_ylim(0, 1)
        axes[0,0].set_title('Category Performance Overview')
        axes[0,0].grid(True)
        axes[0,0].legend()

        # 2. Category Performance Bar Chart
        avg_scores = category_data.mean().sort_values(ascending=True)
        colors = ['red' if score < 0.3 else 'orange' if score < 0.6 else 'green' for score in avg_scores]

        axes[0,1].barh(range(len(avg_scores)), avg_scores.values, color=colors)
        axes[0,1].set_yticks(range(len(avg_scores)))
        axes[0,1].set_yticklabels(avg_scores.index)
        axes[0,1].set_xlabel('Average Score')
        axes[0,1].set_title('Category Ranking (Lowest to Highest)')
        axes[0,1].set_xlim(0, 1)

        # Add score labels on bars
        for i, score in enumerate(avg_scores.values):
            axes[0,1].text(score + 0.02, i, f'{score:.3f}', va='center')

        # 3. Category Distribution Heatmap
        heatmap_data = category_data.T
        sns.heatmap(heatmap_data, ax=axes[1,0], cmap='RdYlGn', center=0.5,
                   cbar_kws={'label': 'Category Score'}, xticklabels=False)
        axes[1,0].set_title('Category Scores by Page')
        axes[1,0].set_ylabel('Categories')

        # 4. Priority vs Category Coverage
        priority_order = ['HIGH', 'MEDIUM', 'LOW']
        priority_colors = {'HIGH': 'red', 'MEDIUM': 'orange', 'LOW': 'green'}

        for priority in priority_order:
            priority_data = self.results_df[self.results_df['priority'] == priority]
            if not priority_data.empty:
                axes[1,1].scatter(priority_data['category_coverage'],
                                priority_data['semantic_quality_score'],
                                label=f'{priority} Priority',
                                color=priority_colors[priority],
                                alpha=0.7, s=50)

        axes[1,1].set_xlabel('Category Coverage (out of 6)')
        axes[1,1].set_ylabel('Semantic Quality Score')
        axes[1,1].set_title('Priority Classification Analysis')
        axes[1,1].legend()
        axes[1,1].grid(True, alpha=0.3)

        plt.suptitle(f'Category Performance Analysis - {self.client_name}', fontsize=14)
        plt.tight_layout()
        plt.show()

    def create_content_optimization_matrix(self):
        """Create content optimization priority matrix"""

        print("üéØ Creating Content Optimization Matrix...")

        # Create interactive priority matrix
        fig = go.Figure()

        priority_colors = {'HIGH': 'red', 'MEDIUM': 'orange', 'LOW': 'green'}

        for priority in ['HIGH', 'MEDIUM', 'LOW']:
            priority_data = self.results_df[self.results_df['priority'] == priority]
            if not priority_data.empty:
                fig.add_trace(go.Scatter(
                    x=priority_data['total_triples'],
                    y=priority_data['semantic_quality_score'],
                    mode='markers+text',
                    name=f'{priority} Priority',
                    text=priority_data['url_short'],
                    textposition='top center',
                    marker=dict(
                        color=priority_colors[priority],
                        size=priority_data['category_coverage'] * 3 + 5,  # Size by category coverage
                        opacity=0.7,
                        line=dict(width=1, color='white')
                    ),
                    hovertemplate='<b>%{text}</b><br>' +
                                'Quality Score: %{y:.3f}<br>' +
                                'Total Triples: %{x}<br>' +
                                'Priority: ' + priority + '<extra></extra>'
                ))

        # Add threshold lines
        fig.add_hline(y=self.client_config['priority_thresholds']['high'],
                     line_dash="dash", line_color="red",
                     annotation_text="High Priority Threshold")
        fig.add_hline(y=self.client_config['priority_thresholds']['medium'],
                     line_dash="dash", line_color="orange",
                     annotation_text="Medium Priority Threshold")

        fig.update_layout(
            title=f'Content Optimization Priority Matrix - {self.client_name}',
            xaxis_title='Total Semantic Triples Found',
            yaxis_title='Semantic Quality Score',
            height=600,
            showlegend=True
        )

        fig.show()

    def create_semantic_triple_analysis(self):
        """Analyze semantic triples found across all pages"""

        print("üîç Creating Semantic Triple Analysis...")

        # Prepare triple data
        all_triples = []
        for result in self.results:
            for triple in result['semantic_triples']:
                all_triples.append({
                    'URL': result['url_short'],
                    'Category': self.category_display_names.get(
                        triple['category'],
                        triple['category'].replace('_', ' ').title()
                    ),
                    'Subject': triple['subject'],
                    'Predicate': triple['predicate'],
                    'Object': triple['object'],
                    'Confidence': triple['confidence'],
                    'Entity Enhanced': triple.get('entity_enhanced', False)
                })

        if not all_triples:
            print("‚ö†Ô∏è No semantic triples found to analyze")
            return

        triples_df = pd.DataFrame(all_triples)

        # Create analysis visualizations
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))

        # 1. Triples by Category
        category_counts = triples_df['Category'].value_counts()
        colors = plt.cm.Set3(np.linspace(0, 1, len(category_counts)))

        axes[0,0].pie(category_counts.values, labels=category_counts.index, autopct='%1.1f%%',
                     colors=colors, startangle=90)
        axes[0,0].set_title('Semantic Triples Distribution by Category')

        # 2. Confidence Score Distribution
        axes[0,1].hist(triples_df['Confidence'], bins=20, alpha=0.7, color='skyblue', edgecolor='black')
        axes[0,1].axvline(triples_df['Confidence'].mean(), color='red', linestyle='--',
                         label=f'Mean: {triples_df["Confidence"].mean():.3f}')
        axes[0,1].set_xlabel('Confidence Score')
        axes[0,1].set_ylabel('Number of Triples')
        axes[0,1].set_title('Semantic Triple Confidence Distribution')
        axes[0,1].legend()

        # 3. Entity Enhancement Impact
        if self.analyzer.entity_enhanced:
            enhancement_counts = triples_df['Entity Enhanced'].value_counts()
            axes[1,0].bar(['Text Only', 'Entity Enhanced'],
                         [enhancement_counts.get(False, 0), enhancement_counts.get(True, 0)],
                         color=['lightblue', 'darkblue'])
            axes[1,0].set_title('Entity Enhancement Impact')
            axes[1,0].set_ylabel('Number of Triples')
        else:
            axes[1,0].text(0.5, 0.5, 'Text-Only Analysis\nNo Entity Enhancement',
                          ha='center', va='center', fontsize=14)
            axes[1,0].set_title('Analysis Mode')

        # 4. Top Predicates Used
        predicate_counts = triples_df['Predicate'].value_counts().head(10)
        axes[1,1].barh(range(len(predicate_counts)), predicate_counts.values, color='coral')
        axes[1,1].set_yticks(range(len(predicate_counts)))
        axes[1,1].set_yticklabels(predicate_counts.index)
        axes[1,1].set_xlabel('Frequency')
        axes[1,1].set_title('Most Common Relationship Types (Predicates)')

        plt.suptitle(f'Semantic Triple Analysis - {self.client_name}', fontsize=14)
        plt.tight_layout()
        plt.show()

        # Show sample high-confidence triples
        print("\nüèÜ High-Confidence Semantic Triples (Sample):")
        high_conf_triples = triples_df[triples_df['Confidence'] > 0.7].head(10)
        for _, triple in high_conf_triples.iterrows():
            enhancement = " üî•" if triple['Entity Enhanced'] else ""
            print(f"   [{triple['Category']}] {triple['Subject']} ‚Üí {triple['Predicate']} ‚Üí {triple['Object']}{enhancement}")
            print(f"   Confidence: {triple['Confidence']:.3f}")

    def create_competitive_analysis(self):
        """Analyze competitor mentions and differentiation"""

        print("üèÜ Creating Competitive Analysis...")

        # Find competitor mentions in triples
        competitors = self.client_config.get('competitors', [])
        if not competitors:
            print("‚ö†Ô∏è No competitors configured for analysis")
            return

        competitor_mentions = []
        differentiation_triples = []

        for result in self.results:
            url_short = result['url_short']

            # Look for competitor mentions
            for triple in result['semantic_triples']:
                text_content = f"{triple['subject']} {triple['predicate']} {triple['object']}".lower()

                for competitor in competitors:
                    if competitor.lower() in text_content:
                        competitor_mentions.append({
                            'URL': url_short,
                            'Competitor': competitor,
                            'Context': f"{triple['subject']} {triple['predicate']} {triple['object']}",
                            'Category': triple['category'],
                            'Confidence': triple['confidence']
                        })

                # Look for differentiation language
                diff_keywords = ['unlike', 'different', 'better', 'superior', 'unique', 'only', 'exclusively']
                if any(keyword in text_content for keyword in diff_keywords):
                    differentiation_triples.append({
                        'URL': url_short,
                        'Triple': f"{triple['subject']} ‚Üí {triple['predicate']} ‚Üí {triple['object']}",
                        'Confidence': triple['confidence']
                    })

        # Create visualizations
        fig, axes = plt.subplots(1, 2, figsize=(16, 6))

        # 1. Competitor Mentions
        if competitor_mentions:
            comp_df = pd.DataFrame(competitor_mentions)
            comp_counts = comp_df['Competitor'].value_counts()

            axes[0].bar(range(len(comp_counts)), comp_counts.values, color='lightcoral')
            axes[0].set_xticks(range(len(comp_counts)))
            axes[0].set_xticklabels(comp_counts.index, rotation=45)
            axes[0].set_ylabel('Number of Mentions')
            axes[0].set_title('Competitor Mentions in Content')
        else:
            axes[0].text(0.5, 0.5, 'No Competitor\nMentions Found', ha='center', va='center', fontsize=14)
            axes[0].set_title('Competitor Analysis')

        # 2. Differentiation Statements
        if differentiation_triples:
            diff_by_url = pd.DataFrame(differentiation_triples)['URL'].value_counts().head(10)

            axes[1].barh(range(len(diff_by_url)), diff_by_url.values, color='lightgreen')
            axes[1].set_yticks(range(len(diff_by_url)))
            axes[1].set_yticklabels(diff_by_url.index)
            axes[1].set_xlabel('Number of Differentiation Statements')
            axes[1].set_title('Pages with Most Differentiation Language')
        else:
            axes[1].text(0.5, 0.5, 'No Differentiation\nLanguage Found', ha='center', va='center', fontsize=14)
            axes[1].set_title('Differentiation Analysis')

        plt.suptitle(f'Competitive & Differentiation Analysis - {self.client_name}', fontsize=14)
        plt.tight_layout()
        plt.show()

        # Print sample findings
        if competitor_mentions:
            print(f"\nüîç Competitor Mentions Found: {len(competitor_mentions)}")
            for mention in competitor_mentions[:5]:
                print(f"   {mention['URL']}: {mention['Context']}")

        if differentiation_triples:
            print(f"\nüí™ Differentiation Statements Found: {len(differentiation_triples)}")
            for diff in differentiation_triples[:5]:
                print(f"   {diff['URL']}: {diff['Triple']}")

# =====================================================
# MAIN VISUALIZATION EXECUTION FUNCTIONS
# =====================================================

def run_complete_visualization_suite(results, results_df, client_config, analyzer):
    """Run the complete V2 visualization suite"""

    print("üé® STARTING V2 VISUALIZATION SUITE")
    print("=" * 50)

    # Initialize visualization suite
    viz_suite = VisualizationSuiteV2(results, results_df, client_config, analyzer)

    # Create all visualizations
    print("\n1. Executive Dashboard")
    viz_suite.create_executive_dashboard()

    print("\n2. Category Performance Analysis")
    viz_suite.create_category_performance_analysis()

    print("\n3. Content Optimization Matrix")
    viz_suite.create_content_optimization_matrix()

    print("\n4. Semantic Triple Analysis")
    viz_suite.create_semantic_triple_analysis()

    print("\n5. Competitive Analysis")
    viz_suite.create_competitive_analysis()

    print("\n" + "=" * 50)
    print("üéâ VISUALIZATION SUITE COMPLETE!")
    print("üìä All charts generated successfully")

    return viz_suite

def create_custom_visualization(results, results_df, client_config, viz_type="executive"):
    """Create specific visualization type"""

    viz_suite = VisualizationSuiteV2(results, results_df, client_config, analyzer)

    if viz_type == "executive":
        viz_suite.create_executive_dashboard()
    elif viz_type == "categories":
        viz_suite.create_category_performance_analysis()
    elif viz_type == "optimization":
        viz_suite.create_content_optimization_matrix()
    elif viz_type == "triples":
        viz_suite.create_semantic_triple_analysis()
    elif viz_type == "competitive":
        viz_suite.create_competitive_analysis()
    else:
        print(f"‚ùå Unknown visualization type: {viz_type}")
        print("Available types: executive, categories, optimization, triples, competitive")

# =====================================================
# READY TO USE
# =====================================================

print("‚úÖ V2 Visualization Suite Loaded!")
print("\nUsage after running Cell 2 analysis:")
print("1. Complete suite: viz_suite = run_complete_visualization_suite(results, results_df, client_config, analyzer)")
print("2. Specific chart: create_custom_visualization(results, results_df, client_config, 'executive')")
print("\nVisualization types available:")
print("- executive: High-level dashboard")
print("- categories: Category performance analysis")
print("- optimization: Priority matrix for content optimization")
print("- triples: Semantic relationship analysis")
print("- competitive: Competitor and differentiation analysis")