In [26]:
!pip install torch transformers accelerate pandas numpy nltk scikit-learn reportlab PyPDF2 pdfplumber



In [27]:
import os
import json
import pandas as pd
from typing import Dict, List, Any, Optional
try:
    from typing import Tuple
except ImportError:
    # For older Python versions
    Tuple = tuple
from dataclasses import dataclass, field
from datetime import datetime
import re
from collections import Counter, defaultdict
import numpy as np
import torch

# PDF generation
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, KeepTogether
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY, TA_RIGHT

# Natural Language Processing
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.sentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer

# PDF reading
import PyPDF2
import pdfplumber

# HuggingFace Transformers for LLM integration
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import warnings
warnings.filterwarnings("ignore")

In [28]:
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

try:
    nltk.data.find('vader_lexicon')
except LookupError:
    nltk.download('vader_lexicon')


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /usr/share/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [29]:
# ## 4. Define Enhanced Data Classes
@dataclass
class ModuleMapping:
    """Represents a module and its coverage status"""
    module_name: str
    module_code: str = ""
    required: bool = False
    available: bool = False
    coverage_percentage: float = 0.0
    status: str = "Unknown"  # Available, Partial, Missing, In Development
    notes: str = ""
    client_priority: str = "Medium"  # High, Medium, Low

@dataclass
class GapItem:
    """Represents a specific gap"""
    gap_type: str  # Technical, Resource, Compliance, Process, Functional
    description: str
    severity: str  # Critical, High, Medium, Low
    business_impact: str
    mitigation_strategy: str
    effort_required: str  # Days/weeks/months
    cost_estimate: str
    dependencies: List[str] = field(default_factory=list)
    risk_if_unaddressed: str = ""

@dataclass
class WinLossFactors:
    """Factors from win/loss analysis"""
    factor_type: str  # Win or Loss
    category: str  # Technical, Price, Relationship, Process, Competition
    description: str
    frequency: int  # How often this appears
    impact_level: str  # High, Medium, Low
    specific_examples: List[str] = field(default_factory=list)
    lessons_learned: str = ""

@dataclass
class CustomerRequirement:
    """Specific customer requirement"""
    requirement_id: str
    requirement_type: str  # Functional, Technical, Compliance, Performance
    description: str
    priority: str  # Must Have, Nice to Have, Optional
    our_capability: str  # Full, Partial, None, In Development
    gap_description: str = ""
    effort_to_meet: str = ""
    notes: str = ""

@dataclass
class CompetitiveAnalysis:
    """Competitive positioning analysis"""
    competitor_name: str
    strengths: List[str]
    weaknesses: List[str]
    win_rate_against: float
    typical_strategy: str
    our_advantages: List[str]
    our_disadvantages: List[str]

@dataclass
class RiskAssessment:
    """Risk assessment for RFP response"""
    risk_type: str  # Technical, Commercial, Delivery, Compliance
    description: str
    probability: str  # High, Medium, Low
    impact: str  # High, Medium, Low
    mitigation_plan: str
    owner: str = "TBD"
    due_date: str = "TBD"

@dataclass
class RFPFeasibilityAssessment:
    """Overall RFP response feasibility"""
    can_respond: bool
    confidence_score: float  # 0-100
    win_probability: float  # 0-100
    
    # Detailed assessments
    total_modules: int = 0
    available_modules: int = 0
    partial_modules: int = 0
    missing_modules: int = 0
    
    critical_gaps: List[GapItem] = field(default_factory=list)
    all_gaps: List[GapItem] = field(default_factory=list)
    
    strengths: List[str] = field(default_factory=list)
    weaknesses: List[str] = field(default_factory=list)
    opportunities: List[str] = field(default_factory=list)
    threats: List[str] = field(default_factory=list)
    
    required_actions: List[tuple] = field(default_factory=list)  # (action, priority, timeline, owner)
    risks: List[RiskAssessment] = field(default_factory=list)
    
    investment_required: str = ""
    timeline_estimate: str = ""
    resource_requirements: str = ""
    
    executive_summary: str = ""
    detailed_rationale: str = ""
    competitive_positioning: str = ""


In [34]:
# ## 5. Enhanced Document Processor
class DocumentProcessor:
    """Processes documents and extracts structured information"""
    
    def __init__(self):
        self.stop_words = set(stopwords.words('english'))
        
    def extract_text_from_pdf(self, file_path: str) -> str:
        """Extract text from PDF files with better handling"""
        text = ""
        
        # Try pdfplumber first (usually better for complex PDFs)
        try:
            with pdfplumber.open(file_path) as pdf:
                for page in pdf.pages:
                    # Extract text with better layout preservation
                    page_text = page.extract_text(x_tolerance=2, y_tolerance=3)
                    if page_text:
                        # Clean up common PDF extraction issues
                        page_text = page_text.replace('\n\n', '\n')
                        page_text = re.sub(r'(\w)-\n(\w)', r'\1\2', page_text)  # Fix hyphenated words
                        page_text = re.sub(r'\s+', ' ', page_text)  # Normalize whitespace
                        text += page_text + "\n\n"
            
            if text.strip():
                print(f"✓ Extracted {len(text)} characters using pdfplumber")
                return text
        except Exception as e:
            print(f"pdfplumber failed: {e}")
        
        # Fallback to PyPDF2
        try:
            with open(file_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                for page_num in range(len(pdf_reader.pages)):
                    page = pdf_reader.pages[page_num]
                    page_text = page.extract_text()
                    if page_text:
                        # Clean up text
                        page_text = re.sub(r'(\w)-\n(\w)', r'\1\2', page_text)
                        page_text = re.sub(r'\s+', ' ', page_text)
                        text += page_text + "\n\n"
            
            if text.strip():
                print(f"✓ Extracted {len(text)} characters using PyPDF2")
                return text
        except Exception as e:
            print(f"PyPDF2 failed: {e}")
        
        return ""
        
    def clean_text(self, text: str) -> str:
        """Clean and normalize text for better extraction"""
        # Fix common PDF extraction issues
        text = re.sub(r'(\w)-\n(\w)', r'\1\2', text)  # Fix hyphenated words at line breaks
        text = re.sub(r'(\w)-\s+(\w)', r'\1\2', text)  # Fix hyphenated words with spaces
        text = re.sub(r'\n+', '\n', text)  # Normalize line breaks
        text = re.sub(r'\s+', ' ', text)  # Normalize whitespace
        text = re.sub(r'[^\x00-\x7F]+', ' ', text)  # Remove non-ASCII characters
        
        # Fix common OCR/extraction errors
        text = text.replace('■', 'fi')  # Common ligature issue
        text = text.replace('●', '•')  # Normalize bullet points
        
        return text
    
    def extract_text_from_file(self, file_path: str) -> str:
        """Extract text from various file formats"""
        if not os.path.exists(file_path):
            return ""
            
        _, ext = os.path.splitext(file_path.lower())
        
        if ext == '.txt':
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                text = f.read()
                return self.clean_text(text)
        elif ext == '.csv':
            df = pd.read_csv(file_path)
            text = df.to_string()
            return self.clean_text(text)
        elif ext == '.json':
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                text = json.dumps(data, indent=2)
                return self.clean_text(text)
        elif ext == '.pdf':
            text = self.extract_text_from_pdf(file_path)
            return self.clean_text(text)
        
        return ""
    
    def extract_key_information(self, text: str, info_type: str) -> Dict[str, Any]:
        """Extract specific types of information from text"""
        results = {
            'raw_extracts': [],
            'structured_data': [],
            'statistics': {}
        }
        
        # Define extraction patterns based on info type
        patterns = {
            'modules': {
                'patterns': [
                    r'(?:Module|module)\s+([A-Z0-9]+)(?:\s*[:-]?\s*)([^\.;\n]+)',
                    r'([A-Z][A-Za-z\s]+)\s+Module(?:\s*[:-]?\s*)([^\.;\n]+)',
                    r'(?:Available|Required|Missing)\s+modules?:?\s*([^\.;\n]+)',
                    r'(\w+)\s+(?:module|Module)\s*-\s*(Available|Partial|Missing|Required)',
                ],
                'keywords': ['module', 'component', 'system', 'functionality', 'feature']
            },
            'gaps': {
                'patterns': [
                    r'(?:Gap|gap|Missing|Lacking)\s*[:-]?\s*([^\.;\n]+)',
                    r'(?:Need|Require|Must have)\s+(?:to|for)?\s*([^\.;\n]+)',
                    r'(?:Cannot|Unable to|Lack of)\s+([^\.;\n]+)',
                    r'(?:Issue|Problem|Challenge)\s*[:-]?\s*([^\.;\n]+)',
                ],
                'keywords': ['gap', 'missing', 'lack', 'need', 'require', 'issue', 'problem']
            },
            'requirements': {
                'patterns': [
                    r'(?:Requirement|Required)\s*[:-]?\s*([^\.;\n]+)',
                    r'(?:Client|Customer)\s+(?:needs?|wants?|requires?)\s*[:-]?\s*([^\.;\n]+)',
                    r'(?:Must|Should|Shall)\s+(?:have|support|provide)\s*([^\.;\n]+)',
                    r'(?:Functional|Technical|Performance)\s+requirement\s*[:-]?\s*([^\.;\n]+)',
                ],
                'keywords': ['requirement', 'need', 'must have', 'should have', 'specification']
            },
            'wins_losses': {
                'patterns': [
                    r'(?:Won|Lost|Win|Loss)\s+(?:because|due to)\s*[:-]?\s*([^\.;\n]+)',
                    r'(?:Success|Failure)\s+factor\s*[:-]?\s*([^\.;\n]+)',
                    r'(?:Strength|Weakness)\s*[:-]?\s*([^\.;\n]+)',
                    r'(?:Competitive advantage|disadvantage)\s*[:-]?\s*([^\.;\n]+)',
                ],
                'keywords': ['won', 'lost', 'win', 'loss', 'success', 'failure', 'strength', 'weakness']
            }
        }
        
        if info_type in patterns:
            # Extract using patterns
            for pattern in patterns[info_type]['patterns']:
                matches = re.findall(pattern, text, re.IGNORECASE | re.MULTILINE)
                results['raw_extracts'].extend(matches)
            
            # Extract sections containing keywords
            sentences = sent_tokenize(text)
            for sentence in sentences:
                if any(keyword in sentence.lower() for keyword in patterns[info_type]['keywords']):
                    results['structured_data'].append(sentence.strip())
            
            # Calculate statistics
            results['statistics'] = {
                'total_matches': len(results['raw_extracts']),
                'unique_items': len(set(str(m) for m in results['raw_extracts'])),
                'keyword_sentences': len(results['structured_data'])
            }
        
        return results
    
    def parse_modules_enhanced(self, text: str) -> List[ModuleMapping]:
        """Enhanced module extraction with better text cleaning"""
        modules = []
        module_dict = {}  # To avoid duplicates
        
        # Clean the text first
        text = re.sub(r'\s+', ' ', text)  # Normalize whitespace
        text = re.sub(r'(\w)-\s+(\w)', r'\1\2', text)  # Fix split words
        
        # Extract module information using more specific patterns
        module_patterns = [
            # Pattern for "Module X: Name - Description"
            r'Module\s+([A-Z0-9]+):\s*([^-\n]+?)(?:\s*[-–]\s*([^\n]+))?',
            # Pattern for numbered modules
            r'(\d+)\.\s*([A-Za-z][A-Za-z0-9\s]+?)(?:\s*[-–:]\s*([^\n]+))?',
            # Pattern for module listings
            r'([A-Z][A-Za-z0-9\s]+?)\s+Module(?:\s*[-–:]\s*([^\n]+))?',
            # Pattern for availability statements
            r'([A-Za-z][A-Za-z0-9\s]+?)\s+(?:is\s+)?(Available|Partial|Missing|Required|In Development)',
        ]
        
        # First, try to find module sections in the text
        module_section_pattern = r'(?:Module|MODULES?|Available Modules?|Required Modules?)[:\s]*\n([^=]+?)(?:\n\n|$)'
        module_sections = re.findall(module_section_pattern, text, re.IGNORECASE | re.MULTILINE)
        
        # Process module sections if found
        for section in module_sections:
            lines = section.strip().split('\n')
            for line in lines:
                line = line.strip()
                if len(line) > 5 and not line.startswith(('Note:', 'Total:', 'Summary:')):
                    # Try to parse module info from line
                    # Remove common prefixes
                    clean_line = re.sub(r'^\d+\.\s*', '', line)
                    clean_line = re.sub(r'^[-•]\s*', '', clean_line)
                    
                    # Extract module name and details
                    parts = re.split(r'[-–:]', clean_line, 1)
                    if parts:
                        module_name = parts[0].strip()
                        module_desc = parts[1].strip() if len(parts) > 1 else ""
                        
                        # Clean module name
                        module_name = re.sub(r'\s+', ' ', module_name)
                        module_name = module_name.replace('Module', '').strip()
                        
                        if module_name and len(module_name) > 2 and module_name not in module_dict:
                            # Determine status from description
                            status = "Unknown"
                            available = False
                            if any(word in line.lower() for word in ['available', 'existing', 'implemented', 'have']):
                                status = "Available"
                                available = True
                            elif any(word in line.lower() for word in ['partial', 'limited']):
                                status = "Partial"
                                available = True
                            elif any(word in line.lower() for word in ['missing', 'gap', 'not available', 'need']):
                                status = "Missing"
                            elif any(word in line.lower() for word in ['development', 'developing', 'building']):
                                status = "In Development"
                            
                            module = ModuleMapping(
                                module_name=module_name,
                                module_code=f"MOD-{len(modules)+1:03d}",
                                required='required' in line.lower() or 'must' in line.lower(),
                                available=available,
                                coverage_percentage=100.0 if status == "Available" else 50.0 if status == "Partial" else 0.0,
                                status=status,
                                notes=module_desc[:200],
                                client_priority="High" if any(word in line.lower() for word in ['critical', 'essential', 'must']) else "Medium"
                            )
                            module_dict[module_name] = module
                            modules.append(module)
        
        # Also try pattern matching on full text
        for pattern in module_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE | re.MULTILINE)
            for match in matches:
                if isinstance(match, tuple):
                    # Extract parts based on match groups
                    if len(match) >= 2:
                        module_name = match[1].strip() if len(match) > 1 and match[1] else match[0].strip()
                        module_desc = match[2].strip() if len(match) > 2 and match[2] else ""
                    else:
                        module_name = match[0].strip()
                        module_desc = ""
                else:
                    module_name = str(match).strip()
                    module_desc = ""
                
                # Clean module name
                module_name = re.sub(r'\s+', ' ', module_name)
                module_name = module_name.replace('Module', '').strip()
                
                # Skip if too short or already added
                if not module_name or len(module_name) < 3 or module_name in module_dict:
                    continue
                
                # Skip common false positives
                if module_name.lower() in ['the', 'and', 'for', 'with', 'that', 'this', 'from']:
                    continue
                
                # Create module entry
                module = ModuleMapping(
                    module_name=module_name,
                    module_code=f"MOD-{len(modules)+1:03d}",
                    status="Unknown",
                    notes=module_desc[:200] if module_desc else "Identified from document scan"
                )
                module_dict[module_name] = module
                modules.append(module)
        
        # If we found very few modules, try a more aggressive extraction
        if len(modules) < 10:
            # Look for lists of capabilities or features
            capability_patterns = [
                r'(?:Capability|Feature|Component|System|Function):\s*([A-Za-z][A-Za-z0-9\s]+)',
                r'(?:•|[-*])\s*([A-Za-z][A-Za-z0-9\s]+?)(?:\s*[-–:]\s*[^\n]+)?',
            ]
            
            for pattern in capability_patterns:
                matches = re.findall(pattern, text, re.IGNORECASE)
                for match in matches[:20]:  # Limit to avoid noise
                    module_name = match.strip()
                    module_name = re.sub(r'\s+', ' ', module_name)
                    
                    if module_name and len(module_name) > 5 and module_name not in module_dict:
                        module = ModuleMapping(
                            module_name=module_name,
                            module_code=f"MOD-{len(modules)+1:03d}",
                            status="Unknown",
                            notes="Potential module/capability identified"
                        )
                        module_dict[module_name] = module
                        modules.append(module)
        
        return modules
    
    def parse_gaps_enhanced(self, text: str) -> List[GapItem]:
        """Enhanced gap extraction with better text handling"""
        gaps = []
        seen_gaps = set()  # To avoid duplicates
        
        # Clean the text
        text = re.sub(r'\s+', ' ', text)
        text = re.sub(r'(\w)-\s+(\w)', r'\1\2', text)
        
        # Look for gap sections first
        gap_section_patterns = [
            r'(?:Gaps?|Missing|Lacking|Issues?)[:\s]*\n([^=]+?)(?:\n\n|$)',
            r'(?:Gap Analysis|Capability Gaps?)[:\s]*\n([^=]+?)(?:\n\n|$)',
            r'(?:What we need|Requirements? not met)[:\s]*\n([^=]+?)(?:\n\n|$)'
        ]
        
        for pattern in gap_section_patterns:
            sections = re.findall(pattern, text, re.IGNORECASE | re.MULTILINE)
            for section in sections:
                lines = section.strip().split('\n')
                for line in lines:
                    line = line.strip()
                    if len(line) > 10 and line not in seen_gaps:
                        seen_gaps.add(line)
                        
                        # Determine gap type and severity
                        gap_type = "Technical"
                        severity = "Medium"
                        
                        line_lower = line.lower()
                        if any(word in line_lower for word in ['compliance', 'regulation', 'standard', 'certification']):
                            gap_type = "Compliance"
                        elif any(word in line_lower for word in ['resource', 'staff', 'team', 'personnel']):
                            gap_type = "Resource"
                        elif any(word in line_lower for word in ['process', 'procedure', 'workflow', 'methodology']):
                            gap_type = "Process"
                        elif any(word in line_lower for word in ['function', 'feature', 'capability', 'module']):
                            gap_type = "Functional"
                        
                        if any(word in line_lower for word in ['critical', 'urgent', 'immediate', 'blocker']):
                            severity = "Critical"
                        elif any(word in line_lower for word in ['high', 'important', 'significant', 'major']):
                            severity = "High"
                        elif any(word in line_lower for word in ['low', 'minor', 'small', 'nice to have']):
                            severity = "Low"
                        
                        # Extract mitigation if mentioned
                        mitigation = "Develop action plan to address this gap"
                        if 'recommend' in line_lower or 'suggest' in line_lower or 'should' in line_lower:
                            # Try to extract recommendation
                            parts = re.split(r'(?:recommend|suggest|should)', line, flags=re.IGNORECASE)
                            if len(parts) > 1:
                                mitigation = parts[1].strip()[:100]
                        
                        gaps.append(GapItem(
                            gap_type=gap_type,
                            description=line[:200],
                            severity=severity,
                            business_impact=f"{severity} impact on RFP response capability",
                            mitigation_strategy=mitigation,
                            effort_required="1-2 weeks" if severity == "Critical" else "2-4 weeks" if severity == "High" else "1-2 months",
                            cost_estimate="TBD - requires detailed analysis",
                            risk_if_unaddressed=f"May result in RFP disqualification" if severity in ["Critical", "High"] else "May reduce competitiveness"
                        ))
        
        # Also look for specific gap patterns
        gap_patterns = [
            r'(?:Gap|Missing|Lack(?:ing)?|Need)\s*[:–]\s*([^.;\n]+)',
            r'(?:We do not have|Cannot provide|Unable to)\s+([^.;\n]+)',
            r'(?:Must develop|Need to implement|Should acquire)\s+([^.;\n]+)',
            r'(?:Compliance gap|Technical gap|Resource gap)\s*[:–]\s*([^.;\n]+)',
        ]
        
        for pattern in gap_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            for match in matches[:20]:  # Limit to avoid noise
                gap_desc = match.strip()
                if len(gap_desc) > 10 and gap_desc not in seen_gaps:
                    seen_gaps.add(gap_desc)
                    
                    gaps.append(GapItem(
                        gap_type="Technical",
                        description=gap_desc[:200],
                        severity="Medium",
                        business_impact="Medium impact on RFP response capability",
                        mitigation_strategy="Develop action plan to address this gap",
                        effort_required="2-4 weeks",
                        cost_estimate="TBD - requires detailed analysis",
                        risk_if_unaddressed="May reduce competitiveness"
                    ))
        
        return gaps
    
    def parse_requirements_enhanced(self, text: str) -> List[CustomerRequirement]:
        """Enhanced requirement extraction with better parsing"""
        requirements = []
        seen_requirements = set()
        
        # Clean the text
        text = re.sub(r'\s+', ' ', text)
        text = re.sub(r'(\w)-\s+(\w)', r'\1\2', text)
        
        # Look for requirements sections
        req_section_patterns = [
            r'(?:Requirements?|Customer Needs?|Client Requirements?)[:\s]*\n([^=]+?)(?:\n\n|$)',
            r'(?:Must have|Should have|Functional requirements?)[:\s]*\n([^=]+?)(?:\n\n|$)',
            r'(?:Technical specifications?|Performance requirements?)[:\s]*\n([^=]+?)(?:\n\n|$)'
        ]
        
        req_counter = 0
        for pattern in req_section_patterns:
            sections = re.findall(pattern, text, re.IGNORECASE | re.MULTILINE)
            for section in sections:
                lines = section.strip().split('\n')
                for line in lines:
                    line = line.strip()
                    # Remove common list markers
                    line = re.sub(r'^[\d\-•*]+\s*', '', line)
                    
                    if len(line) > 10 and line not in seen_requirements:
                        seen_requirements.add(line)
                        req_counter += 1
                        
                        # Determine requirement type
                        req_type = "Functional"
                        line_lower = line.lower()
                        if any(word in line_lower for word in ['technical', 'technology', 'system', 'architecture']):
                            req_type = "Technical"
                        elif any(word in line_lower for word in ['compliance', 'regulation', 'standard', 'certification']):
                            req_type = "Compliance"
                        elif any(word in line_lower for word in ['performance', 'speed', 'capacity', 'scalability']):
                            req_type = "Performance"
                        elif any(word in line_lower for word in ['business', 'process', 'workflow']):
                            req_type = "Business"
                        
                        # Determine priority
                        priority = "Nice to Have"
                        if any(word in line_lower for word in ['must', 'mandatory', 'required', 'critical', 'essential']):
                            priority = "Must Have"
                        elif any(word in line_lower for word in ['should', 'important', 'needed']):
                            priority = "Nice to Have"
                        elif any(word in line_lower for word in ['optional', 'desirable', 'preferred', 'could']):
                            priority = "Optional"
                        
                        # Assess our capability
                        capability = "Partial"
                        if any(word in line_lower for word in ['we have', 'available', 'existing', 'current', 'already']):
                            capability = "Full"
                        elif any(word in line_lower for word in ['developing', 'building', 'in progress']):
                            capability = "In Development"
                        elif any(word in line_lower for word in ['missing', 'gap', 'need', 'require', 'cannot']):
                            capability = "None"
                        
                        requirement = CustomerRequirement(
                            requirement_id=f"REQ-{req_counter:03d}",
                            requirement_type=req_type,
                            description=line[:200],
                            priority=priority,
                            our_capability=capability,
                            gap_description="Gap exists - needs development" if capability in ["None", "Partial"] else "",
                            effort_to_meet="1-2 weeks" if capability == "Partial" else "2-4 weeks" if capability == "None" else "Already met",
                            notes="Extracted from document analysis"
                        )
                        requirements.append(requirement)
        
        # Also look for specific requirement patterns
        req_patterns = [
            r'(?:The client requires?|Customer needs?|Must provide)\s+([^.;\n]+)',
            r'(?:System must|Solution should|Platform needs to)\s+([^.;\n]+)',
            r'(?:Requirement \d+\s*[:–]|REQ\d+\s*[:–])\s*([^.;\n]+)',
            r'(?:Functional requirement|Technical requirement)\s*[:–]\s*([^.;\n]+)',
        ]
        
        for pattern in req_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            for match in matches[:15]:
                req_desc = match.strip()
                if len(req_desc) > 10 and req_desc not in seen_requirements:
                    seen_requirements.add(req_desc)
                    req_counter += 1
                    
                    requirements.append(CustomerRequirement(
                        requirement_id=f"REQ-{req_counter:03d}",
                        requirement_type="Functional",
                        description=req_desc[:200],
                        priority="Nice to Have",
                        our_capability="Partial",
                        gap_description="To be assessed",
                        effort_to_meet="TBD",
                        notes="Identified from pattern matching"
                    ))
        
        return requirements
    
    def parse_win_loss_enhanced(self, text: str) -> List[WinLossFactors]:
        """Enhanced win/loss factor extraction with better text parsing"""
        factors = []
        seen_factors = set()  # To avoid duplicates
        
        # Clean the text
        text = re.sub(r'\s+', ' ', text)
        text = re.sub(r'(\w)-\s+(\w)', r'\1\2', text)
        
        # Look for win/loss sections
        win_section_patterns = [
            r'(?:Why we won|Success factors?|Winning factors?|Strengths?)[:\s]*\n([^=]+?)(?:\n\n|$)',
            r'(?:Competitive advantages?|Key differentiators?)[:\s]*\n([^=]+?)(?:\n\n|$)'
        ]
        
        loss_section_patterns = [
            r'(?:Why we lost|Loss factors?|Failure reasons?|Weaknesses?)[:\s]*\n([^=]+?)(?:\n\n|$)',
            r'(?:Competitive disadvantages?|Areas for improvement)[:\s]*\n([^=]+?)(?:\n\n|$)'
        ]
        
        # Extract win factors
        for pattern in win_section_patterns:
            sections = re.findall(pattern, text, re.IGNORECASE | re.MULTILINE)
            for section in sections:
                lines = section.strip().split('\n')
                for line in lines:
                    line = line.strip()
                    if len(line) > 10 and line not in seen_factors:
                        seen_factors.add(line)
                        
                        # Determine category
                        category = "Technical"
                        line_lower = line.lower()
                        if any(word in line_lower for word in ['price', 'cost', 'budget', 'pricing']):
                            category = "Price"
                        elif any(word in line_lower for word in ['relationship', 'trust', 'partnership', 'client']):
                            category = "Relationship"
                        elif any(word in line_lower for word in ['process', 'delivery', 'implementation', 'methodology']):
                            category = "Process"
                        elif any(word in line_lower for word in ['competition', 'competitor', 'competitive']):
                            category = "Competition"
                        
                        factors.append(WinLossFactors(
                            factor_type="Win",
                            category=category,
                            description=line[:150],
                            frequency=1,
                            impact_level="High",
                            lessons_learned="Leverage this strength in future RFPs"
                        ))
        
        # Extract loss factors
        for pattern in loss_section_patterns:
            sections = re.findall(pattern, text, re.IGNORECASE | re.MULTILINE)
            for section in sections:
                lines = section.strip().split('\n')
                for line in lines:
                    line = line.strip()
                    if len(line) > 10 and line not in seen_factors:
                        seen_factors.add(line)
                        
                        # Determine category
                        category = "Technical"
                        line_lower = line.lower()
                        if any(word in line_lower for word in ['price', 'cost', 'budget', 'expensive']):
                            category = "Price"
                        elif any(word in line_lower for word in ['relationship', 'trust', 'communication']):
                            category = "Relationship"
                        elif any(word in line_lower for word in ['process', 'delivery', 'delay', 'timeline']):
                            category = "Process"
                        elif any(word in line_lower for word in ['competition', 'competitor', 'lost to']):
                            category = "Competition"
                        
                        factors.append(WinLossFactors(
                            factor_type="Loss",
                            category=category,
                            description=line[:150],
                            frequency=1,
                            impact_level="High",
                            lessons_learned="Address this weakness to improve win rate"
                        ))
        
        # Also look for specific patterns
        win_patterns = [
            r'(?:Won|Success|Strength)\s+(?:because|due to|factor)\s*[:–]?\s*([^.;\n]+)',
            r'(?:Client appreciated|Customer liked|Positive feedback on)\s+([^.;\n]+)',
            r'(?:Our advantage|We excel at|Strong in)\s+([^.;\n]+)',
        ]
        
        loss_patterns = [
            r'(?:Lost|Failed|Weakness)\s+(?:because|due to|factor)\s*[:–]?\s*([^.;\n]+)',
            r'(?:Client concerned about|Customer disliked|Negative feedback on)\s+([^.;\n]+)',
            r'(?:Our weakness|We lack|Need improvement in)\s+([^.;\n]+)',
        ]
        
        # Process win patterns
        for pattern in win_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            for match in matches[:10]:
                factor_desc = match.strip()
                if len(factor_desc) > 10 and factor_desc not in seen_factors:
                    seen_factors.add(factor_desc)
                    factors.append(WinLossFactors(
                        factor_type="Win",
                        category="Technical",
                        description=factor_desc[:150],
                        frequency=1,
                        impact_level="Medium",
                        lessons_learned="Continue to leverage this strength"
                    ))
        
        # Process loss patterns
        for pattern in loss_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            for match in matches[:10]:
                factor_desc = match.strip()
                if len(factor_desc) > 10 and factor_desc not in seen_factors:
                    seen_factors.add(factor_desc)
                    factors.append(WinLossFactors(
                        factor_type="Loss",
                        category="Technical",
                        description=factor_desc[:150],
                        frequency=1,
                        impact_level="Medium",
                        lessons_learned="Develop strategy to address this issue"
                    ))
        
        return factors

In [35]:
class RFPFeasibilityAnalyzer:
    """Main analyzer that determines if we can respond to the RFP"""
    
    def __init__(self):
        self.processor = DocumentProcessor()
        self.modules = []
        self.gaps = []
        self.win_loss_factors = []
        self.requirements = []
        self.document_summaries = {}
        
    def load_and_analyze_documents(self, file_paths: Dict[str, str]) -> Dict[str, Any]:
        """Load documents and extract structured information"""
        results = {
            'documents_processed': 0,
            'total_content_length': 0,
            'extraction_summary': {}
        }
        
        for doc_type, file_path in file_paths.items():
            print(f"\n📄 Analyzing {doc_type}...")
            text = self.processor.extract_text_from_file(file_path)
            
            if not text:
                print(f"  ⚠️  Warning: Could not extract text from {file_path}")
                continue
            
            print(f"  ✓ Extracted {len(text)} characters")
            results['documents_processed'] += 1
            results['total_content_length'] += len(text)
            
            # Store document summary
            self.document_summaries[doc_type] = {
                'length': len(text),
                'preview': text[:500],
                'sentences': len(sent_tokenize(text))
            }
            
            # Extract information based on document type
            if doc_type == 'module_matching':
                self.modules = self.processor.parse_modules_enhanced(text)
                results['extraction_summary']['modules'] = len(self.modules)
                print(f"  ✓ Extracted {len(self.modules)} modules")
                
            elif doc_type == 'gap_analysis':
                self.gaps = self.processor.parse_gaps_enhanced(text)
                results['extraction_summary']['gaps'] = len(self.gaps)
                print(f"  ✓ Extracted {len(self.gaps)} gaps")
                
            elif doc_type == 'win_loss':
                self.win_loss_factors = self.processor.parse_win_loss_enhanced(text)
                results['extraction_summary']['win_loss_factors'] = len(self.win_loss_factors)
                print(f"  ✓ Extracted {len(self.win_loss_factors)} win/loss factors")
                
            elif doc_type == 'customer_needs':
                self.requirements = self.processor.parse_requirements_enhanced(text)
                results['extraction_summary']['requirements'] = len(self.requirements)
                print(f"  ✓ Extracted {len(self.requirements)} requirements")
        
        return results
    
    def calculate_feasibility(self) -> RFPFeasibilityAssessment:
        """Calculate overall feasibility of responding to RFP"""
        
        # Module analysis
        total_modules = len(self.modules)
        available_modules = len([m for m in self.modules if m.status in ["Available", "Partial"]])
        partial_modules = len([m for m in self.modules if m.status == "Partial"])
        missing_modules = len([m for m in self.modules if m.status in ["Missing", "Unknown"]])
        
        module_coverage = (available_modules / total_modules * 100) if total_modules > 0 else 0
        
        # Gap analysis
        critical_gaps = [g for g in self.gaps if g.severity in ['Critical', 'High']]
        medium_gaps = [g for g in self.gaps if g.severity == 'Medium']
        low_gaps = [g for g in self.gaps if g.severity == 'Low']
        
        # Requirements analysis
        must_have_reqs = [r for r in self.requirements if r.priority == 'Must Have']
        met_must_haves = [r for r in must_have_reqs if r.our_capability in ['Full', 'Partial']]
        requirement_coverage = (len(met_must_haves) / len(must_have_reqs) * 100) if must_have_reqs else 100
        
        # Win/Loss analysis
        win_factors = [f for f in self.win_loss_factors if f.factor_type == 'Win']
        loss_factors = [f for f in self.win_loss_factors if f.factor_type == 'Loss']
        win_loss_ratio = len(win_factors) / (len(loss_factors) + 1)  # +1 to avoid division by zero
        
        # Calculate confidence score (0-100)
        confidence_score = 0
        
        # Module coverage (30% weight)
        confidence_score += module_coverage * 0.3
        
        # Requirement coverage (30% weight)
        confidence_score += requirement_coverage * 0.3
        
        # Gap severity (20% weight - inverse)
        gap_penalty = len(critical_gaps) * 10 + len(medium_gaps) * 5 + len(low_gaps) * 2
        confidence_score += max(0, 20 - gap_penalty)
        
        # Win/Loss history (20% weight)
        if win_loss_ratio > 2:
            confidence_score += 20
        elif win_loss_ratio > 1:
            confidence_score += 15
        elif win_loss_ratio > 0.5:
            confidence_score += 10
        else:
            confidence_score += 5
        
        # Determine if we can respond
        can_respond = confidence_score >= 60 and len(critical_gaps) <= 2
        
        # Calculate win probability (includes market factors)
        win_probability = confidence_score * 0.8 if can_respond else confidence_score * 0.5
        
        # Generate SWOT analysis
        strengths = [
            f"Strong win/loss ratio: {win_loss_ratio:.1f}" if win_loss_ratio > 1 else None,
            f"{available_modules} modules already available" if available_modules > 0 else None,
            f"{len(met_must_haves)} must-have requirements already met" if met_must_haves else None,
        ] + [f.description for f in win_factors[:3]]
        strengths = [s for s in strengths if s]  # Remove None values
        
        weaknesses = [
            f"{missing_modules} modules missing or unknown" if missing_modules > 0 else None,
            f"{len(critical_gaps)} critical gaps identified" if critical_gaps else None,
            f"Low requirement coverage: {requirement_coverage:.0f}%" if requirement_coverage < 70 else None,
        ] + [f.description for f in loss_factors[:3]]
        weaknesses = [w for w in weaknesses if w]  # Remove None values
        
        opportunities = [
            "Quick wins possible with partial modules" if partial_modules > 0 else None,
            "Experience from previous wins can be leveraged" if win_factors else None,
            "Gap mitigation strategies identified" if self.gaps else None,
        ]
        opportunities = [o for o in opportunities if o]
        
        threats = [
            "Critical gaps may disqualify response" if critical_gaps else None,
            "Previous loss factors still relevant" if loss_factors else None,
            "Significant development effort required" if missing_modules > total_modules * 0.3 else None,
        ]
        threats = [t for t in threats if t]
        
        # Generate required actions
        required_actions = []
        
        # Add actions for critical gaps
        for gap in critical_gaps[:3]:
            required_actions.append((
                f"Address {gap.gap_type} gap: {gap.description[:50]}...",
                gap.severity,
                gap.effort_required,
                "Gap Team"
            ))
        
        # Add actions for missing must-have requirements
        unmet_must_haves = [r for r in must_have_reqs if r.our_capability in ['None', 'Partial']]
        for req in unmet_must_haves[:2]:
            required_actions.append((
                f"Develop capability for {req.requirement_type}: {req.description[:40]}...",
                "High",
                req.effort_to_meet,
                "Development Team"
            ))
        
        # Add actions for missing modules
        critical_missing_modules = [m for m in self.modules if m.status == "Missing" and m.client_priority == "High"]
        for module in critical_missing_modules[:2]:
            required_actions.append((
                f"Acquire/develop module: {module.module_name}",
                "Critical",
                "2-4 weeks",
                "Technical Team"
            ))
        
        # Risk assessment
        risks = []
        if len(critical_gaps) > 0:
            risks.append(RiskAssessment(
                risk_type="Technical",
                description=f"{len(critical_gaps)} critical technical gaps may prevent successful delivery",
                probability="High" if len(critical_gaps) > 2 else "Medium",
                impact="High",
                mitigation_plan="Fast-track gap closure with dedicated resources"
            ))
        
        if module_coverage < 70:
            risks.append(RiskAssessment(
                risk_type="Delivery",
                description=f"Low module coverage ({module_coverage:.0f}%) risks delivery timeline",
                probability="High",
                impact="Medium",
                mitigation_plan="Partner or acquire missing modules"
            ))
        
        if requirement_coverage < 80:
            risks.append(RiskAssessment(
                risk_type="Commercial",
                description=f"Requirement gaps may impact client satisfaction",
                probability="Medium",
                impact="High",
                mitigation_plan="Clearly communicate development roadmap to client"
            ))
        
        # Investment and timeline estimates
        investment_parts = []
        timeline_parts = []
        
        if missing_modules > 0:
            investment_parts.append(f"${missing_modules * 50}K-${missing_modules * 100}K for module development")
            timeline_parts.append(f"{missing_modules * 2}-{missing_modules * 4} weeks for modules")
        
        if len(critical_gaps) > 0:
            investment_parts.append(f"${len(critical_gaps) * 25}K-${len(critical_gaps) * 50}K for gap mitigation")
            timeline_parts.append(f"{len(critical_gaps)}-{len(critical_gaps) * 2} weeks for critical gaps")
        
        investment_required = " + ".join(investment_parts) if investment_parts else "Minimal investment required"
        timeline_estimate = ", ".join(timeline_parts) if timeline_parts else "Can proceed immediately"
        
        # Resource requirements
        resource_needs = []
        if missing_modules > 5:
            resource_needs.append(f"{missing_modules // 2} developers")
        if len(critical_gaps) > 3:
            resource_needs.append(f"{len(critical_gaps) // 2} architects")
        if len(unmet_must_haves) > 0:
            resource_needs.append("1-2 business analysts")
        
        resource_requirements = ", ".join(resource_needs) if resource_needs else "Existing team sufficient"
        
        # Generate detailed rationale
        detailed_rationale = f"""
## Feasibility Analysis Details

### Module Readiness
- Total Modules Analyzed: {total_modules}
- Available: {available_modules} ({available_modules/total_modules*100:.1f}%)
- Partial: {partial_modules} ({partial_modules/total_modules*100:.1f}%)
- Missing: {missing_modules} ({missing_modules/total_modules*100:.1f}%)

### Requirements Coverage
- Must-Have Requirements: {len(must_have_reqs)}
- Currently Met: {len(met_must_haves)} ({requirement_coverage:.1f}%)
- Gap to Close: {len(unmet_must_haves)} requirements

### Risk Assessment
- Critical Gaps: {len(critical_gaps)}
- Total Risks Identified: {len(risks)}
- Highest Risk Area: {"Technical" if len(critical_gaps) > 2 else "Delivery" if module_coverage < 70 else "Commercial"}

### Historical Performance
- Win/Loss Ratio: {win_loss_ratio:.2f}
- Key Success Factors: {len(win_factors)}
- Known Challenges: {len(loss_factors)}

### Investment & Timeline
- Estimated Investment: {investment_required}
- Timeline to RFP-Ready: {timeline_estimate}
- Resources Needed: {resource_requirements}
        """.strip()
        
        # Competitive positioning
        competitive_positioning = f"""
Based on our analysis:
- Market Position: {"Strong" if win_loss_ratio > 1.5 else "Average" if win_loss_ratio > 0.7 else "Weak"}
- Key Differentiators: {', '.join([s[:30] + '...' for s in strengths[:3]]) if strengths else 'Limited differentiators identified'}
- Competitive Gaps: {', '.join([w[:30] + '...' for w in weaknesses[:3]]) if weaknesses else 'No major gaps identified'}
        """.strip()
        
        # Executive summary
        executive_summary = f"""
## RFP Response Feasibility Assessment

**Decision: {"GO - Proceed with Response" if can_respond else "NO-GO - Do Not Proceed"}**

### Key Metrics:
- Overall Confidence: {confidence_score:.1f}%
- Win Probability: {win_probability:.1f}%
- Module Coverage: {module_coverage:.1f}%
- Requirement Coverage: {requirement_coverage:.1f}%

### Summary:
Based on analysis of {len(self.document_summaries)} documents containing {sum(d['length'] for d in self.document_summaries.values())} characters of content:

- **Modules**: {available_modules}/{total_modules} available ({module_coverage:.0f}% coverage)
- **Requirements**: {len(met_must_haves)}/{len(must_have_reqs)} must-haves met ({requirement_coverage:.0f}% coverage)
- **Gaps**: {len(critical_gaps)} critical, {len(medium_gaps)} medium, {len(low_gaps)} low severity
- **Win/Loss**: Historical ratio of {win_loss_ratio:.1f} based on {len(self.win_loss_factors)} factors

### Recommendation:
{"✅ **Proceed with RFP response.** While there are gaps to address, our historical performance and current capabilities provide a solid foundation for a competitive response." if can_respond else "❌ **Do not proceed without significant preparation.** Critical gaps and low coverage indicate high risk of unsuccessful response or delivery failure."}

### Next Steps:
1. {required_actions[0][0] if required_actions else "Proceed with standard RFP response process"}
2. {required_actions[1][0] if len(required_actions) > 1 else "Allocate resources for response preparation"}
3. {required_actions[2][0] if len(required_actions) > 2 else "Schedule stakeholder review meeting"}
        """.strip()
        
        return RFPFeasibilityAssessment(
            can_respond=can_respond,
            confidence_score=confidence_score,
            win_probability=win_probability,
            total_modules=total_modules,
            available_modules=available_modules,
            partial_modules=partial_modules,
            missing_modules=missing_modules,
            critical_gaps=critical_gaps,
            all_gaps=self.gaps,
            strengths=strengths,
            weaknesses=weaknesses,
            opportunities=opportunities,
            threats=threats,
            required_actions=required_actions,
            risks=risks,
            investment_required=investment_required,
            timeline_estimate=timeline_estimate,
            resource_requirements=resource_requirements,
            executive_summary=executive_summary,
            detailed_rationale=detailed_rationale,
            competitive_positioning=competitive_positioning
        )

In [36]:
class RFPFeasibilityAnalyzer:
    """Main analyzer that determines if we can respond to the RFP"""
    
    def __init__(self):
        self.processor = DocumentProcessor()
        self.modules = []
        self.gaps = []
        self.win_loss_factors = []
        self.requirements = []
        self.document_summaries = {}
        
    def load_and_analyze_documents(self, file_paths: Dict[str, str]) -> Dict[str, Any]:
        """Load documents and extract structured information"""
        results = {
            'documents_processed': 0,
            'total_content_length': 0,
            'extraction_summary': {}
        }
        
        for doc_type, file_path in file_paths.items():
            print(f"\n📄 Analyzing {doc_type}...")
            text = self.processor.extract_text_from_file(file_path)
            
            if not text:
                print(f"  ⚠️  Warning: Could not extract text from {file_path}")
                continue
            
            print(f"  ✓ Extracted {len(text)} characters")
            results['documents_processed'] += 1
            results['total_content_length'] += len(text)
            
            # Store document summary
            self.document_summaries[doc_type] = {
                'length': len(text),
                'preview': text[:500],
                'sentences': len(sent_tokenize(text))
            }
            
            # Extract information based on document type
            if doc_type == 'module_matching':
                self.modules = self.processor.parse_modules_enhanced(text)
                results['extraction_summary']['modules'] = len(self.modules)
                print(f"  ✓ Extracted {len(self.modules)} modules")
                
            elif doc_type == 'gap_analysis':
                self.gaps = self.processor.parse_gaps_enhanced(text)
                results['extraction_summary']['gaps'] = len(self.gaps)
                print(f"  ✓ Extracted {len(self.gaps)} gaps")
                
            elif doc_type == 'win_loss':
                self.win_loss_factors = self.processor.parse_win_loss_enhanced(text)
                results['extraction_summary']['win_loss_factors'] = len(self.win_loss_factors)
                print(f"  ✓ Extracted {len(self.win_loss_factors)} win/loss factors")
                
            elif doc_type == 'customer_needs':
                self.requirements = self.processor.parse_requirements_enhanced(text)
                results['extraction_summary']['requirements'] = len(self.requirements)
                print(f"  ✓ Extracted {len(self.requirements)} requirements")
        
        return results
    
    def calculate_feasibility(self) -> RFPFeasibilityAssessment:
        """Calculate overall feasibility of responding to RFP"""
        
        # Module analysis
        total_modules = len(self.modules)
        available_modules = len([m for m in self.modules if m.status in ["Available", "Partial"]])
        partial_modules = len([m for m in self.modules if m.status == "Partial"])
        missing_modules = len([m for m in self.modules if m.status in ["Missing", "Unknown"]])
        
        module_coverage = (available_modules / total_modules * 100) if total_modules > 0 else 0
        
        # Gap analysis
        critical_gaps = [g for g in self.gaps if g.severity in ['Critical', 'High']]
        medium_gaps = [g for g in self.gaps if g.severity == 'Medium']
        low_gaps = [g for g in self.gaps if g.severity == 'Low']
        
        # Requirements analysis
        must_have_reqs = [r for r in self.requirements if r.priority == 'Must Have']
        met_must_haves = [r for r in must_have_reqs if r.our_capability in ['Full', 'Partial']]
        requirement_coverage = (len(met_must_haves) / len(must_have_reqs) * 100) if must_have_reqs else 100
        
        # Win/Loss analysis
        win_factors = [f for f in self.win_loss_factors if f.factor_type == 'Win']
        loss_factors = [f for f in self.win_loss_factors if f.factor_type == 'Loss']
        win_loss_ratio = len(win_factors) / (len(loss_factors) + 1)  # +1 to avoid division by zero
        
        # Calculate confidence score (0-100)
        confidence_score = 0
        
        # Module coverage (30% weight)
        confidence_score += module_coverage * 0.3
        
        # Requirement coverage (30% weight)
        confidence_score += requirement_coverage * 0.3
        
        # Gap severity (20% weight - inverse)
        gap_penalty = len(critical_gaps) * 10 + len(medium_gaps) * 5 + len(low_gaps) * 2
        confidence_score += max(0, 20 - gap_penalty)
        
        # Win/Loss history (20% weight)
        if win_loss_ratio > 2:
            confidence_score += 20
        elif win_loss_ratio > 1:
            confidence_score += 15
        elif win_loss_ratio > 0.5:
            confidence_score += 10
        else:
            confidence_score += 5
        
        # Determine if we can respond
        can_respond = confidence_score >= 60 and len(critical_gaps) <= 2
        
        # Calculate win probability (includes market factors)
        win_probability = confidence_score * 0.8 if can_respond else confidence_score * 0.5
        
        # Generate SWOT analysis
        strengths = [
            f"Strong win/loss ratio: {win_loss_ratio:.1f}" if win_loss_ratio > 1 else None,
            f"{available_modules} modules already available" if available_modules > 0 else None,
            f"{len(met_must_haves)} must-have requirements already met" if met_must_haves else None,
        ] + [f.description for f in win_factors[:3]]
        strengths = [s for s in strengths if s]  # Remove None values
        
        weaknesses = [
            f"{missing_modules} modules missing or unknown" if missing_modules > 0 else None,
            f"{len(critical_gaps)} critical gaps identified" if critical_gaps else None,
            f"Low requirement coverage: {requirement_coverage:.0f}%" if requirement_coverage < 70 else None,
        ] + [f.description for f in loss_factors[:3]]
        weaknesses = [w for w in weaknesses if w]  # Remove None values
        
        opportunities = [
            "Quick wins possible with partial modules" if partial_modules > 0 else None,
            "Experience from previous wins can be leveraged" if win_factors else None,
            "Gap mitigation strategies identified" if self.gaps else None,
        ]
        opportunities = [o for o in opportunities if o]
        
        threats = [
            "Critical gaps may disqualify response" if critical_gaps else None,
            "Previous loss factors still relevant" if loss_factors else None,
            "Significant development effort required" if missing_modules > total_modules * 0.3 else None,
        ]
        threats = [t for t in threats if t]
        
        # Generate required actions
        required_actions = []
        
        # Add actions for critical gaps
        for gap in critical_gaps[:3]:
            required_actions.append((
                f"Address {gap.gap_type} gap: {gap.description[:50]}...",
                gap.severity,
                gap.effort_required,
                "Gap Team"
            ))
        
        # Add actions for missing must-have requirements
        unmet_must_haves = [r for r in must_have_reqs if r.our_capability in ['None', 'Partial']]
        for req in unmet_must_haves[:2]:
            required_actions.append((
                f"Develop capability for {req.requirement_type}: {req.description[:40]}...",
                "High",
                req.effort_to_meet,
                "Development Team"
            ))
        
        # Add actions for missing modules
        critical_missing_modules = [m for m in self.modules if m.status == "Missing" and m.client_priority == "High"]
        for module in critical_missing_modules[:2]:
            required_actions.append((
                f"Acquire/develop module: {module.module_name}",
                "Critical",
                "2-4 weeks",
                "Technical Team"
            ))
        
        # Risk assessment
        risks = []
        if len(critical_gaps) > 0:
            risks.append(RiskAssessment(
                risk_type="Technical",
                description=f"{len(critical_gaps)} critical technical gaps may prevent successful delivery",
                probability="High" if len(critical_gaps) > 2 else "Medium",
                impact="High",
                mitigation_plan="Fast-track gap closure with dedicated resources"
            ))
        
        if module_coverage < 70:
            risks.append(RiskAssessment(
                risk_type="Delivery",
                description=f"Low module coverage ({module_coverage:.0f}%) risks delivery timeline",
                probability="High",
                impact="Medium",
                mitigation_plan="Partner or acquire missing modules"
            ))
        
        if requirement_coverage < 80:
            risks.append(RiskAssessment(
                risk_type="Commercial",
                description=f"Requirement gaps may impact client satisfaction",
                probability="Medium",
                impact="High",
                mitigation_plan="Clearly communicate development roadmap to client"
            ))
        
        # Investment and timeline estimates
        investment_parts = []
        timeline_parts = []
        
        if missing_modules > 0:
            investment_parts.append(f"${missing_modules * 50}K-${missing_modules * 100}K for module development")
            timeline_parts.append(f"{missing_modules * 2}-{missing_modules * 4} weeks for modules")
        
        if len(critical_gaps) > 0:
            investment_parts.append(f"${len(critical_gaps) * 25}K-${len(critical_gaps) * 50}K for gap mitigation")
            timeline_parts.append(f"{len(critical_gaps)}-{len(critical_gaps) * 2} weeks for critical gaps")
        
        investment_required = " + ".join(investment_parts) if investment_parts else "Minimal investment required"
        timeline_estimate = ", ".join(timeline_parts) if timeline_parts else "Can proceed immediately"
        
        # Resource requirements
        resource_needs = []
        if missing_modules > 5:
            resource_needs.append(f"{missing_modules // 2} developers")
        if len(critical_gaps) > 3:
            resource_needs.append(f"{len(critical_gaps) // 2} architects")
        if len(unmet_must_haves) > 0:
            resource_needs.append("1-2 business analysts")
        
        resource_requirements = ", ".join(resource_needs) if resource_needs else "Existing team sufficient"
        
        # Generate detailed rationale
        detailed_rationale = f"""
## Feasibility Analysis Details

### Module Readiness
- Total Modules Analyzed: {total_modules}
- Available: {available_modules} ({available_modules/total_modules*100:.1f}%)
- Partial: {partial_modules} ({partial_modules/total_modules*100:.1f}%)
- Missing: {missing_modules} ({missing_modules/total_modules*100:.1f}%)

### Requirements Coverage
- Must-Have Requirements: {len(must_have_reqs)}
- Currently Met: {len(met_must_haves)} ({requirement_coverage:.1f}%)
- Gap to Close: {len(unmet_must_haves)} requirements

### Risk Assessment
- Critical Gaps: {len(critical_gaps)}
- Total Risks Identified: {len(risks)}
- Highest Risk Area: {"Technical" if len(critical_gaps) > 2 else "Delivery" if module_coverage < 70 else "Commercial"}

### Historical Performance
- Win/Loss Ratio: {win_loss_ratio:.2f}
- Key Success Factors: {len(win_factors)}
- Known Challenges: {len(loss_factors)}

### Investment & Timeline
- Estimated Investment: {investment_required}
- Timeline to RFP-Ready: {timeline_estimate}
- Resources Needed: {resource_requirements}
        """.strip()
        
        # Competitive positioning
        competitive_positioning = f"""
Based on our analysis:
- Market Position: {"Strong" if win_loss_ratio > 1.5 else "Average" if win_loss_ratio > 0.7 else "Weak"}
- Key Differentiators: {', '.join([s[:30] + '...' for s in strengths[:3]]) if strengths else 'Limited differentiators identified'}
- Competitive Gaps: {', '.join([w[:30] + '...' for w in weaknesses[:3]]) if weaknesses else 'No major gaps identified'}
        """.strip()
        
        # Executive summary
        executive_summary = f"""
## RFP Response Feasibility Assessment

**Decision: {"GO - Proceed with Response" if can_respond else "NO-GO - Do Not Proceed"}**

### Key Metrics:
- Overall Confidence: {confidence_score:.1f}%
- Win Probability: {win_probability:.1f}%
- Module Coverage: {module_coverage:.1f}%
- Requirement Coverage: {requirement_coverage:.1f}%

### Summary:
Based on analysis of {len(self.document_summaries)} documents containing {sum(d['length'] for d in self.document_summaries.values())} characters of content:

- **Modules**: {available_modules}/{total_modules} available ({module_coverage:.0f}% coverage)
- **Requirements**: {len(met_must_haves)}/{len(must_have_reqs)} must-haves met ({requirement_coverage:.0f}% coverage)
- **Gaps**: {len(critical_gaps)} critical, {len(medium_gaps)} medium, {len(low_gaps)} low severity
- **Win/Loss**: Historical ratio of {win_loss_ratio:.1f} based on {len(self.win_loss_factors)} factors

### Recommendation:
{"✅ **Proceed with RFP response.** While there are gaps to address, our historical performance and current capabilities provide a solid foundation for a competitive response." if can_respond else "❌ **Do not proceed without significant preparation.** Critical gaps and low coverage indicate high risk of unsuccessful response or delivery failure."}

### Next Steps:
1. {required_actions[0][0] if required_actions else "Proceed with standard RFP response process"}
2. {required_actions[1][0] if len(required_actions) > 1 else "Allocate resources for response preparation"}
3. {required_actions[2][0] if len(required_actions) > 2 else "Schedule stakeholder review meeting"}
        """.strip()
        
        return RFPFeasibilityAssessment(
            can_respond=can_respond,
            confidence_score=confidence_score,
            win_probability=win_probability,
            total_modules=total_modules,
            available_modules=available_modules,
            partial_modules=partial_modules,
            missing_modules=missing_modules,
            critical_gaps=critical_gaps,
            all_gaps=self.gaps,
            strengths=strengths,
            weaknesses=weaknesses,
            opportunities=opportunities,
            threats=threats,
            required_actions=required_actions,
            risks=risks,
            investment_required=investment_required,
            timeline_estimate=timeline_estimate,
            resource_requirements=resource_requirements,
            executive_summary=executive_summary,
            detailed_rationale=detailed_rationale,
            competitive_positioning=competitive_positioning
        )

In [37]:
def main():
    """Main function to run the RFP feasibility analysis"""
    
    print("=" * 70)
    print("RFP RESPONSE FEASIBILITY ANALYZER v2.0")
    print("=" * 70)
    print("Comprehensive document analysis for RFP go/no-go decision making")
    print("-" * 70)
    
    # Initialize the analyzer
    analyzer = RFPFeasibilityAnalyzer()
    
    # Define file paths
    file_paths = {
        'module_matching': '/kaggle/input/input-reports-fr-agent/module-matching-report.pdf',
        'win_loss': '/kaggle/input/input-reports-fr-agent/win_loss_analysis.pdf', 
        'gap_analysis': '/kaggle/input/input-reports-fr-agent/gap_analysis_report.pdf',
        'customer_needs': '/kaggle/input/input-reports-fr-agent/customer_needs_report.pdf'
    }
    
    print("\n📁 Input Documents:")
    for doc_type, path in file_paths.items():
        print(f"   • {doc_type.replace('_', ' ').title()}: {os.path.basename(path)}")
    
    print("\n" + "-" * 70)
    print("STARTING ANALYSIS...")
    print("-" * 70)
    
    try:
        # Load and analyze documents
        results = analyzer.load_and_analyze_documents(file_paths)
        
        print("\n" + "=" * 70)
        print("EXTRACTION SUMMARY")
        print("=" * 70)
        print(f"Documents Processed: {results['documents_processed']}")
        print(f"Total Content Analyzed: {results['total_content_length']:,} characters")
        print("\nExtracted Items:")
        for item_type, count in results['extraction_summary'].items():
            print(f"   • {item_type.replace('_', ' ').title()}: {count}")
        
        # Calculate feasibility
        print("\n" + "-" * 70)
        print("CALCULATING FEASIBILITY...")
        print("-" * 70)
        
        assessment = analyzer.calculate_feasibility()
        
        # Display results
        print("\n" + "=" * 70)
        print("FEASIBILITY ASSESSMENT RESULTS")
        print("=" * 70)
        print(f"\n{'✅ GO' if assessment.can_respond else '❌ NO-GO'} - {'Proceed with RFP Response' if assessment.can_respond else 'Do Not Proceed'}")
        print(f"\nConfidence Score: {assessment.confidence_score:.1f}%")
        print(f"Win Probability: {assessment.win_probability:.1f}%")
        print(f"\nModule Coverage: {assessment.available_modules}/{assessment.total_modules} ({(assessment.available_modules/assessment.total_modules*100) if assessment.total_modules > 0 else 0:.0f}%)")
        print(f"Critical Gaps: {len(assessment.critical_gaps)}")
        print(f"Total Risks: {len(assessment.risks)}")
        
        # Investment summary
        print(f"\nInvestment Required: {assessment.investment_required}")
        print(f"Timeline Estimate: {assessment.timeline_estimate}")
        print(f"Resources Needed: {assessment.resource_requirements}")
        
        # Top actions
        if assessment.required_actions:
            print("\n🎯 TOP PRIORITY ACTIONS:")
            for i, (action, priority, timeline, owner) in enumerate(assessment.required_actions[:3], 1):
                print(f"   {i}. [{priority}] {action}")
                print(f"      Timeline: {timeline} | Owner: {owner}")
        
        # Generate PDF report
        output_path = "/kaggle/working/rfp_feasibility_report_enhanced.pdf"
        print("\n" + "-" * 70)
        print("GENERATING COMPREHENSIVE REPORT...")
        print("-" * 70)
        
        generator = RFPFeasibilityReportGenerator()
        generator.generate_report(analyzer, assessment, output_path)
        
        print("\n" + "=" * 70)
        print("✅ ANALYSIS COMPLETE!")
        print("=" * 70)
        print(f"📊 Detailed report saved to: {output_path}")
        print(f"📈 Total analysis time: ~{datetime.now().strftime('%S')} seconds")
        
    except Exception as e:
        print(f"\n❌ ERROR during analysis: {e}")
        import traceback
        traceback.print_exc()
        print("\nPlease check that all input files exist and are readable.")

# ## 9. Run the Analysis
if __name__ == "__main__":
    main()

RFP RESPONSE FEASIBILITY ANALYZER v2.0
Comprehensive document analysis for RFP go/no-go decision making
----------------------------------------------------------------------

📁 Input Documents:
   • Module Matching: module-matching-report.pdf
   • Win Loss: win_loss_analysis.pdf
   • Gap Analysis: gap_analysis_report.pdf
   • Customer Needs: customer_needs_report.pdf

----------------------------------------------------------------------
STARTING ANALYSIS...
----------------------------------------------------------------------

📄 Analyzing module_matching...
✓ Extracted 17155 characters using pdfplumber
  ✓ Extracted 17141 characters
  ✓ Extracted 11 modules

📄 Analyzing win_loss...
✓ Extracted 11687 characters using pdfplumber
  ✓ Extracted 11669 characters
  ✓ Extracted 1 win/loss factors

📄 Analyzing gap_analysis...
✓ Extracted 3815 characters using pdfplumber
  ✓ Extracted 3813 characters
  ✓ Extracted 0 gaps

📄 Analyzing customer_needs...
✓ Extracted 2804 characters using pdfplu