In [1]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\15038\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


True

In [None]:
import streamlit as st
import json
import re
from config.config import THEME_COLOR
import nltk
from nltk.corpus import stopwords
from collections import Counter

# Download NLTK resources if needed
try:
    nltk.download('punkt_tab')
    nltk.download('punkt')
    nltk.download('stopwords')
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt_tab')
    nltk.download('punkt')
    nltk.download('stopwords')

def render_resume_analysis():
    """Render the resume analysis component with adaptive UI."""
    if "resume_data" not in st.session_state or not st.session_state.resume_data:
        st.warning("Please upload and analyze your resume first!")
        return
    
    resume_data = st.session_state.resume_data
    
    # Display header with animation
    st.markdown(f"""
    <style>
    .resume-header {{
        background: linear-gradient(90deg, {THEME_COLOR}10, {THEME_COLOR}30, {THEME_COLOR}10);
        background-size: 200% 100%;
        animation: gradient-animation 3s ease infinite;
        padding: 15px;
        border-radius: 10px;
        text-align: center;
        margin-bottom: 20px;
    }}
    @keyframes gradient-animation {{
        0% {{background-position: 0% 50%}}
        50% {{background-position: 100% 50%}}
        100% {{background-position: 0% 50%}}
    }}
    .section-card {{
        background-color: white;
        border-radius: 10px;
        padding: 15px;
        box-shadow: 0 4px 6px rgba(0,0,0,0.05);
        margin-bottom: 15px;
    }}
    .highlight-text {{
        color: {THEME_COLOR};
        font-weight: bold;
    }}
    .skill-tag {{
        display: inline-block;
        background-color: {THEME_COLOR}20;
        color: {THEME_COLOR};
        padding: 3px 10px;
        border-radius: 15px;
        margin: 5px 5px 5px 0px;
        font-size: 0.9em;
        transition: transform 0.2s;
    }}
    .skill-tag:hover {{
        transform: scale(1.05);
        background-color: {THEME_COLOR}30;
    }}
    .progress-container {{
        width: 100%;
        background-color: #e0e0e0;
        border-radius: 5px;
        margin: 5px 0;
    }}
    .progress-bar {{
        height: 10px;
        background-color: {THEME_COLOR};
        border-radius: 5px;
    }}
    .ats-meter {{
        width: 200px;
        height: 100px;
        position: relative;
        margin: 20px auto;
    }}
    .ats-meter-gauge {{
        width: 100%;
        height: 100%;
        position: relative;
        border-radius: 100px 100px 0 0;
        overflow: hidden;
        background-color: #f3f3f3;
    }}
    .ats-meter-fill {{
        position: absolute;
        bottom: 0;
        left: 0;
        width: 100%;
        border-radius: 0 0 100px 100px;
        transition: height 1s ease-out;
    }}
    .ats-meter-text {{
        position: absolute;
        bottom: -5px;
        left: 50%;
        transform: translateX(-50%);
        font-size: 24px;
        font-weight: bold;
    }}
    .industry-match-card {{
        border-left: 4px solid {THEME_COLOR};
        padding: 10px 15px;
        margin: 10px 0;
        background-color: {THEME_COLOR}10;
    }}
    </style>
    <div class="resume-header">
        <h2>‚ú® Resume Analysis Results ‚ú®</h2>
    </div>
    """, unsafe_allow_html=True)
    
    # Try to identify the data format and structure
    is_json = False
    
    if isinstance(resume_data, str):
        try:
            # Try to parse as JSON
            parsed_data = json.loads(resume_data)
            is_json = True
            resume_data = parsed_data
        except:
            # Not JSON, keep as string
            pass
    
    # Create tabs for navigation
    tab_titles = ["Overview", "Detailed Analysis", "ATS Optimization"]
    tabs = st.tabs([f"üìã {tab_titles[0]}", f"üîç {tab_titles[1]}", f"üéØ {tab_titles[2]}"])
    
    # Tab 1: Overview
    with tabs[0]:
        if isinstance(resume_data, dict):
            # Handle dictionary data
            for key, value in resume_data.items():
                with st.expander(f"üìÑ {key.replace('_', ' ').title()}", expanded=True):
                    render_section(key, value)
        elif isinstance(resume_data, str):
            # Handle string data - try to identify sections
            st.markdown('<div class="section-card">', unsafe_allow_html=True)
            st.markdown(resume_data)
            st.markdown('</div>', unsafe_allow_html=True)
        else:
            # Handle any other data type
            st.json(resume_data)
    
    # Tab 2: Detailed Analysis - ENHANCED
    with tabs[1]:
        st.markdown("### Resume Analysis & Insights")
        
        # Extract and analyze content
        resume_text = ""
        if isinstance(resume_data, str):
            resume_text = resume_data
        elif isinstance(resume_data, dict):
            # Convert dict to text for analysis
            resume_text = dict_to_text(resume_data)
        
        if resume_text:
            # 1. Content statistics
            st.markdown("#### üìä Content Statistics")
            words = len(re.findall(r'\b\w+\b', resume_text))
            sentences = len(re.findall(r'[.!?]+', resume_text)) + 1
            paragraphs = len(re.findall(r'\n\s*\n', resume_text)) + 1
            
            col1, col2, col3, col4 = st.columns(4)
            col1.metric("Words", words)
            col2.metric("Sentences", sentences)
            col3.metric("Paragraphs", paragraphs)
            col4.metric("Readability", "Good" if 10 <= words/sentences <= 20 else "Review")
            
            # 2. Action Verbs Analysis
            st.markdown("#### üöÄ Action Verbs Analysis")
            
            # Extract action verbs dynamically instead of hardcoding
            action_verbs_found = extract_action_verbs(resume_text)
            
            # Display verb analysis
            if action_verbs_found:
                col1, col2 = st.columns([2, 1])
                
                with col1:
                    st.markdown("##### Action Verbs Used")
                    verb_html = ""
                    for verb in action_verbs_found[:20]:  # Limit to avoid overwhelming
                        verb_html += f'<span class="skill-tag">{verb}</span>'
                    st.markdown(verb_html, unsafe_allow_html=True)
                
                with col2:
                    st.markdown("##### Top Verbs")
                    verb_count = Counter(action_verbs_found)
                    for verb, count in verb_count.most_common(5):
                        st.markdown(f"**{verb}**: {count} times")
                
                # Action verb diversity score
                unique_verbs = len(set(action_verbs_found))
                diversity_score = min(unique_verbs * 5, 100)
                st.markdown(f"""
                <div class="progress-container">
                    <div class="progress-bar" style="width: {diversity_score}%"></div>
                </div>
                <p>Action Verb Diversity: {diversity_score}%</p>
                """, unsafe_allow_html=True)
                
                # Provide feedback based on verb usage
                if diversity_score < 40:
                    st.info("Consider using more diverse action verbs to strengthen your resume.")
                elif diversity_score >= 80:
                    st.success("Excellent use of diverse action verbs!")
            else:
                st.warning("No action verbs detected. Consider adding strong action verbs to your resume.")
            
            # 3. Skills Analysis
            st.markdown("#### üí° Skills Analysis")
            
            # Extract skills dynamically from text
            all_skills = extract_skills_from_text(resume_text)
            tech_skills = [skill for skill, category in all_skills if category == 'technical']
            soft_skills = [skill for skill, category in all_skills if category == 'soft']
            
            col1, col2 = st.columns(2)
            
            with col1:
                st.markdown("##### Technical Skills")
                if tech_skills:
                    tech_html = ""
                    for skill in tech_skills:
                        tech_html += f'<span class="skill-tag">{skill}</span>'
                    st.markdown(tech_html, unsafe_allow_html=True)
                else:
                    st.info("No specific technical skills detected.")
            
            with col2:
                st.markdown("##### Soft Skills")
                if soft_skills:
                    soft_html = ""
                    for skill in soft_skills:
                        soft_html += f'<span class="skill-tag">{skill}</span>'
                    st.markdown(soft_html, unsafe_allow_html=True)
                else:
                    st.info("No specific soft skills detected.")
            
            # Skills balance analysis
            if tech_skills or soft_skills:
                tech_count = len(tech_skills)
                soft_count = len(soft_skills)
                total = tech_count + soft_count
                
                if total > 0:
                    tech_percent = (tech_count / total) * 100
                    soft_percent = (soft_count / total) * 100
                    
                    st.markdown("##### Skills Balance")
                    st.markdown(f"""
                    <div class="progress-container">
                        <div class="progress-bar" style="width: {tech_percent}%; background-color: {THEME_COLOR};"></div>
                        <div class="progress-bar" style="width: {soft_percent}%; background-color: #6c757d; margin-left: 2px;"></div>
                    </div>
                    <p>Technical: {tech_percent:.1f}% | Soft: {soft_percent:.1f}%</p>
                    """, unsafe_allow_html=True)
                    
                    # Provide balance feedback
                    if tech_percent > 80:
                        st.info("Your resume is heavily technical. Consider highlighting more soft skills if relevant to your target roles.")
                    elif soft_percent > 80:
                        st.info("Your resume emphasizes soft skills. Consider adding more technical skills if relevant to your target roles.")
                    else:
                        st.success("Good balance between technical and soft skills!")
            
            # 4. Experience Analysis
            st.markdown("#### üëî Experience Analysis")
            
            # Extract years of experience
            experience_years, experience_details = extract_experience_details(resume_text)
            
            if experience_years:
                st.metric("Estimated Experience", f"{experience_years} years")
                
                # Experience level classification
                if experience_years < 2:
                    level = "Entry Level"
                elif experience_years < 5:
                    level = "Mid Level"
                elif experience_years < 10:
                    level = "Senior Level"
                else:
                    level = "Executive Level"
                    
                st.markdown(f"Based on your experience, your profile appears to be **{level}**.")
                
                # Show extracted experience details
                if experience_details:
                    st.markdown("##### Experience Timeline")
                    for period, details in experience_details.items():
                        st.markdown(f"**{period}**: {details}")
            else:
                st.info("Could not determine years of experience. Ensure your resume includes clear work duration details.")
            
            # 5. Education Analysis
            st.markdown("#### üéì Education Analysis")
            education_details = extract_education_dynamic(resume_text)
            
            if education_details:
                st.markdown(f"**Highest Education**: {education_details.get('highest_degree', 'Not specified')}")
                if 'institutions' in education_details and education_details['institutions']:
                    st.markdown("**Institutions:**")
                    for institution in education_details['institutions']:
                        st.markdown(f"- {institution}")
                if 'fields' in education_details and education_details['fields']:
                    st.markdown("**Fields of Study:**")
                    for field in education_details['fields']:
                        st.markdown(f"- {field}")
            else:
                st.info("Education details not clearly identified. Consider structuring your education section more clearly.")
            
            # 6. Contact Information Analysis
            st.markdown("#### üìû Contact Information")
            contact_info = extract_contact_info(resume_text)
            
            if contact_info:
                for label, value in contact_info.items():
                    st.markdown(f"**{label}**: {value}")
                
                # Check completeness
                essential_contacts = ['Email', 'Phone', 'LinkedIn']
                missing = [item for item in essential_contacts if item not in contact_info]
                
                if missing:
                    st.warning(f"Missing recommended contact details: {', '.join(missing)}")
                else:
                    st.success("All essential contact information is present!")
            else:
                st.warning("No contact information detected. Make sure to include your email and phone number.")
            
            # 7. Resume Summary
            st.markdown("#### üìù Resume Summary")
            summary = extract_summary(resume_text)
            
            if summary:
                st.markdown(f"**Summary**: {summary}")
            else:
                st.info("No clear summary section detected. A concise professional summary can help highlight your value proposition.")
            
    # Tab 3: ATS Optimization
    with tabs[2]:
        st.markdown("### üéØ ATS Optimization Analysis")
        
        # Detect industry/field from resume
        detected_industry = detect_industry(resume_text)
        if detected_industry:
            st.markdown(f"""
            <div class="industry-match-card">
                <h4>Detected Field: {detected_industry['name']}</h4>
                <p>Your resume appears to be targeting the <b>{detected_industry['name']}</b> field.</p>
            </div>
            """, unsafe_allow_html=True)
            
            # Create field selector for comparison
            selected_industry = st.selectbox(
                "Compare with industry requirements:", 
                [detected_industry['name']] + get_common_industries(exclude=detected_industry['name'])
            )
        else:
            st.warning("Could not automatically detect your target field.")
            selected_industry = st.selectbox(
                "Select your target industry for analysis:", 
                get_common_industries()
            )
            detected_industry = {"name": selected_industry}
        
        # Calculate ATS score based on resume content and selected industry
        ats_score, ats_components = calculate_ats_score(resume_text, detected_industry['name'])
        
        # Display ATS score with a gauge
        score_color = "#28a745" if ats_score >= 80 else "#ffc107" if ats_score >= 60 else "#dc3545"
        st.markdown(f"""
        <div class="ats-meter">
            <div class="ats-meter-gauge">
                <div class="ats-meter-fill" style="height: {ats_score}%; background-color: {score_color};"></div>
            </div>
            <div class="ats-meter-text">{ats_score}%</div>
        </div>
        <h3 style="text-align: center;">ATS Compatibility Score</h3>
        """, unsafe_allow_html=True)
        
        # Display score breakdown
        st.markdown("#### Score Breakdown")
        col1, col2 = st.columns(2)
        
        with col1:
            for component, details in list(ats_components.items())[:len(ats_components)//2]:
                st.markdown(f"**{component}**: {details['score']}%")
                st.progress(details['score']/100)
        
        with col2:
            for component, details in list(ats_components.items())[len(ats_components)//2:]:
                st.markdown(f"**{component}**: {details['score']}%")
                st.progress(details['score']/100)
        
        # Industry match analysis
        st.markdown("#### Industry-Specific Analysis")
        industry_keywords = get_industry_keywords(detected_industry['name'])
        
        # Calculate percentage of industry keywords present
        found_keywords = []
        for keyword in industry_keywords:
            if re.search(r'\b' + re.escape(keyword) + r'\b', resume_text.lower()):
                found_keywords.append(keyword)
        
        keyword_match = len(found_keywords) / len(industry_keywords) * 100 if industry_keywords else 0
        
        st.markdown(f"""
        <div class="progress-container">
            <div class="progress-bar" style="width: {keyword_match}%;"></div>
        </div>
        <p>Industry Keyword Match: {keyword_match:.1f}%</p>
        """, unsafe_allow_html=True)
        
        # Show keywords found and missing
        col1, col2 = st.columns(2)
        
        with col1:
            st.markdown("##### ‚úÖ Keywords Found")
            if found_keywords:
                keywords_html = ""
                for keyword in found_keywords:
                    keywords_html += f'<span class="skill-tag">{keyword}</span>'
                st.markdown(keywords_html, unsafe_allow_html=True)
            else:
                st.info("No industry-specific keywords found.")
        
        with col2:
            st.markdown("##### ‚ùå Missing Keywords")
            missing_keywords = [k for k in industry_keywords if k not in found_keywords]
            if missing_keywords:
                keywords_html = ""
                for keyword in missing_keywords[:10]:  # Limit to top 10
                    keywords_html += f'<span class="skill-tag">{keyword}</span>'
                st.markdown(keywords_html, unsafe_allow_html=True)
                if len(missing_keywords) > 10:
                    st.info(f"... and {len(missing_keywords) - 10} more")
            else:
                st.success("Great job! Your resume includes all relevant industry keywords.")
        
        # Recommendations for improvement
        st.markdown("#### Recommendations for Improvement")
        recommendations = generate_ats_recommendations(resume_text, ats_components, detected_industry['name'])
        
        for category, rec_list in recommendations.items():
            with st.expander(f"üìù {category}", expanded=True):
                for recommendation in rec_list:
                    st.markdown(f"- {recommendation}")

        # ATS Format Analysis
        st.markdown("#### üìÑ ATS Format Analysis")
        format_issues = analyze_resume_format(resume_text)
        
        if not format_issues:
            st.success("Your resume format appears to be ATS-friendly!")
        else:
            st.warning("Detected format issues that might affect ATS compatibility:")
            for issue in format_issues:
                st.markdown(f"- {issue}")
        
        # File format recommendation
        st.info("**Pro Tip**: Save your resume as a .docx or .pdf file with simple formatting for best ATS compatibility.")
        
        # ATS Keyword Position Analysis
        st.markdown("#### üìä Keyword Position Analysis")
        position_analysis = analyze_keyword_positions(resume_text, found_keywords)
        
        if position_analysis.get('score', 0) >= 70:
            st.success(f"Keyword positioning score: {position_analysis.get('score')}% - Good placement!")
        else:
            st.warning(f"Keyword positioning score: {position_analysis.get('score')}% - Could be improved")
            
        st.markdown(position_analysis.get('suggestion', ''))
    
    # Add action buttons at the bottom
    col1, col2 = st.columns(2)
    with col1:
        st.download_button(
            label="üì• Download Analysis",
            data=json.dumps(resume_data, indent=2) if isinstance(resume_data, dict) else str(resume_data),
            file_name="resume_analysis.txt",
            mime="text/plain"
        )
    
    with col2:
        if st.button("üîÑ Regenerate Analysis", type="primary"):
            if "resume_path" in st.session_state:
                st.session_state.pop("resume_data", None)
                st.rerun()

def render_section(key, value):
    """Dynamically render a section based on its content."""
    if isinstance(value, dict):
        # Handle dictionary values
        for k, v in value.items():
            st.markdown(f"**{k.replace('_', ' ').title()}**: {v}")
    
    elif isinstance(value, list):
        # Handle list values
        if key.lower() in ["skills", "technologies", "competencies"]:
            # Render as skill tags
            skills_html = ""
            for item in value:
                if isinstance(item, str):
                    skills_html += f'<span class="skill-tag">{item}</span>'
                else:
                    st.write(item)
            if skills_html:
                st.markdown(skills_html, unsafe_allow_html=True)
        
        elif key.lower() in ["experience", "work", "employment", "jobs"]:
            # Render as job experiences
            for idx, item in enumerate(value):
                if isinstance(item, dict):
                    company = item.get("company", item.get("organization", "Company"))
                    title = item.get("title", item.get("position", "Role"))
                    with st.expander(f"{title} at {company}", expanded=idx==0):
                        for k, v in item.items():
                            if k not in ["company", "organization", "title", "position"]:
                                st.markdown(f"**{k.replace('_', ' ').title()}**: {v}")
                else:
                    st.write(item)
        
        elif key.lower() in ["education", "qualifications", "academic"]:
            # Render as education
            for item in value:
                if isinstance(item, dict):
                    degree = item.get("degree", item.get("qualification", "Degree"))
                    institution = item.get("institution", item.get("school", "Institution"))
                    st.markdown(f"**{degree}** - {institution}")
                    
                    for k, v in item.items():
                        if k not in ["degree", "qualification", "institution", "school"]:
                            st.markdown(f"*{k.replace('_', ' ').title()}*: {v}")
                    st.divider()
                else:
                    st.write(item)
        
        else:
            # Generic list rendering
            for item in value:
                st.write(item)
                st.divider()
    
    else:
        # Handle string/number/other values
        st.write(value)

# Helper functions for detailed analysis
def dict_to_text(data_dict):
    """Convert a dictionary to plain text for analysis."""
    text = ""
    for key, value in data_dict.items():
        text += f"{key.replace('_', ' ').title()}:\n"
        
        if isinstance(value, dict):
            for k, v in value.items():
                text += f"  {k.replace('_', ' ').title()}: {v}\n"
        
        elif isinstance(value, list):
            for item in value:
                if isinstance(item, dict):
                    for k, v in item.items():
                        text += f"  {k.replace('_', ' ').title()}: {v}\n"
                    text += "\n"
                else:
                    text += f"  - {item}\n"
        
        else:
            text += f"  {value}\n"
        
        text += "\n"
    
    return text

def extract_action_verbs(text):
    """Extract action verbs from text using NLP."""
    # Tokenize text
    sentences = nltk.sent_tokenize(text.lower())
    words = []
    for sentence in sentences:
        words.extend(nltk.word_tokenize(sentence))
    
    # Remove stopwords and non-alpha
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word.isalpha() and word not in stop_words]
    
    # Common action verb endings
    action_verb_endings = ['ed', 'ing', 'ize', 'ate', 'en', 'fy', 'ish', 'uce']
    
    # Check for common action verbs in resume context
    common_resume_verbs = [
        'manage', 'lead', 'develop', 'create', 'implement', 'analyze', 
        'design', 'coordinate', 'establish', 'generate', 'launch', 'perform',
        'deliver', 'build', 'resolve', 'transform', 'improve', 'increase',
        'decrease', 'reduce', 'streamline', 'optimize', 'spearhead', 'pioneer'
    ]
    
    # Extract verbs
    action_verbs = []
    for word in words:
        # Check if it's a common resume verb
        if word in common_resume_verbs or any(word.endswith(ending) for ending in action_verb_endings):
            action_verbs.append(word)
    
    return action_verbs

def extract_skills_from_text(text):
    """Extract skills from text using NLP approach."""
    # Tokenize and normalize text
    text = text.lower()
    sentences = nltk.sent_tokenize(text)
    
    # Extract n-grams that might represent skills
    all_phrases = []
    for sentence in sentences:
        words = nltk.word_tokenize(sentence)
        
        # Unigrams
        all_phrases.extend(words)
        
        # Bigrams and trigrams
        for n in range(2, 4):
            for i in range(len(words) - n + 1):
                all_phrases.append(' '.join(words[i:i+n]))
    
    # Filter out common non-skill words
    stop_words = set(stopwords.words('english'))
    
    # Dictionary of skill domains and their relevant keywords
    skill_domains = {
        'technical': [
            # Programming and Development
            'python', 'java', 'javascript', 'html', 'css', 'sql', 'c++', 'c#', 'php', 'ruby',
            'react', 'angular', 'vue', 'node', 'django', 'flask', 'spring', 'express',
            'git', 'docker', 'kubernetes', 'aws', 'azure', 'gcp', 'cloud',
            'restful api', 'graphql', 'database', 'algorithm', 'data structure',
            
            # Data Science & Analytics
            'data analysis', 'machine learning', 'ai', 'artificial intelligence', 'neural network',
            'deep learning', 'nlp', 'natural language', 'tensorflow', 'pytorch', 'scikit',
            'data visualization', 'tableau', 'power bi', 'statistics', 'r programming',
            
            # Design
            'ui design', 'ux design', 'user interface', 'user experience', 'wireframing',
            'photoshop', 'illustrator', 'figma', 'sketch', 'indesign', 'typography',
            
            # Marketing & Business
            'seo', 'sem', 'google analytics', 'social media', 'content marketing',
            'email marketing', 'crm', 'salesforce', 'hubspot', 'market research',
            
            # Other Technical
            'excel', 'word', 'powerpoint', 'office suite', 'project management',
            'jira', 'trello', 'agile', 'scrum', 'kanban', 'waterfall',
            'linux', 'unix', 'windows', 'macos', 'ios', 'android'
        ],
        'soft': [
            'communication', 'teamwork', 'leadership', 'problem solving', 'critical thinking',
            'time management', 'organization', 'adaptability', 'flexibility', 'creativity',
            'emotional intelligence', 'conflict resolution', 'negotiation', 'persuasion',
            'presentation', 'public speaking', 'customer service', 'client relations',
            'mentoring', 'coaching', 'training', 'strategic thinking', 'analytical',
            'research', 'writing', 'editing', 'attention to detail', 'multitasking',
            'prioritization', 'resilience', 'stress management', 'work ethic',
            'collaboration', 'interpersonal', 'networking', 'self-motivation'
        ]
    }
    
    # Look for skill markers
    skill_markers = [
        'experience with', 'proficient in', 'skilled in', 'knowledge of', 'expertise in',
        'familiar with', 'background in', 'trained in', 'certified in', 'specializing in',
        'skills:', 'skill', 'proficiency', 'competency'
    ]
    
    # Detect skills from the resume text
    detected_skills = []
    
    # Detect skills based on predefined domains
    for domain, keywords in skill_domains.items():
        for keyword in keywords:
            if re.search(r'\b' + re.escape(keyword) + r'\b', text):
                detected_skills.append((keyword, domain))
    
    # Detect additional skills based on skill markers
    for marker in skill_markers:
        marker_pattern = re.escape(marker) + r'\s+([\w\s,]+)(?:\.|,|\n)'
        matches = re.finditer(marker_pattern, text)
        for match in matches:
            skill_text = match.group(1).strip()
            skill_items = re.split(r',|\s+and\s+|\s*[&‚Ä¢]\s*', skill_text)
            
            for item in skill_items:
                item = item.strip().lower()
                if item and item not in stop_words and len(item) > 2:
                    # Determine if technical or soft skill
                    domain = 'technical'  # Default assumption
                    if any(soft in item for soft in ['communication', 'leadership', 'teamwork']):
                        domain = 'soft'
                    
                    if (item, domain) not in detected_skills:
                        detected_skills.append((item, domain))
    
    return detected_skills

def extract_experience_details(text):
    """Extract detailed experience information from resume text."""
    # Look for patterns like "X years of experience" or "X+ years"
    year_patterns = [
        r'(\d+)(?:\+)?\s*(?:years|yrs)(?:\s*of)?\s*experience',
        r'experience\s*(?:of)?\s*(\d+)(?:\+)?\s*(?:years|yrs)',
        r'(?:worked|working)\s*(?:for)?\s*(\d+)(?:\+)?\s*(?:years|yrs)',
    ]
    
    years = []
    for pattern in year_patterns:
        matches = re.findall(pattern, text.lower())
        years.extend([int(y) for y in matches if y.isdigit()])
    
    total_years = max(years) if years else 0
    
    # Extract employment dates and details
    date_ranges = re.findall(r'(20\d{2}|19\d{2})\s*(?:-|to|‚Äì|‚Äî)\s*(20\d{2}|19\d{2}|present|current|now)', text.lower())
    
    current_year = 2025  # Update as needed
    experience_details = {}
    
    for start, end in date_ranges:
        if start.isdigit():
            end_year = current_year if end in ['present', 'current', 'now'] else int(end) if end.isdigit() else current_year
            start_year = int(start)
            duration = end_year - start_year
            
            # Extract context around the date to identify the role
            context_pattern = r'([^\.]*?' + re.escape(start) + r'[^\.]*?' + re.escape(end) + r'[^\.]*?\.)'
            context_matches = re.findall(context_pattern, text, re.IGNORECASE)
            
            context = context_matches[0] if context_matches else f"Experience from {start} to {end}"
            experience_details[f"{start}-{end}"] = context
            
            # Update total years if not already detected from explicit statements
            if not years:
                years.append(duration)
    
    # Calculate total years if we have dates but no explicit statements
    if not total_years and years:
        total_years = sum(years)
    
    return total_years, experience_details

def extract_education_dynamic(text):
    """Extract education details dynamically from resume text."""
    # Define education-related keywords and patterns
    education_keywords = ['education', 'degree', 'university', 'college', 'school', 'academic', 'bachelor', 'master', 'phd', 'doctorate', 'diploma']
    degree_patterns = [
        r'(bachelor|master|phd|doctorate|b\.?s\.?|m\.?s\.?|b\.?a\.?|m\.?a\.?|m\.?b\.?a\.?|ph\.?d\.?)',
        r'(bachelor of|master of|doctor of)'
    ]
    
    # Extract education section
    education_section = ""
    for keyword in education_keywords:
        pattern = r'(?:' + re.escape(keyword) + r')(.+?)(?:experience|skills|work history|employment|references|projects|publications|certification|additional)'
        matches = re.findall(pattern, text.lower(), re.DOTALL | re.IGNORECASE)
        if matches:
            education_section += ' ' + matches[0]
    
    if not education_section:
        # Fallback: try to find any sentences containing degree-related words
        sentences = nltk.sent_tokenize(text)
        for sentence in sentences:
            if any(re.search(degree_pattern, sentence.lower()) for degree_pattern in degree_patterns):
                education_section += ' ' + sentence
    
    if not education_section:
        return None
    
    # Extract degree level
    highest_degree = "Not Specified"
    degree_levels = {
        "Doctorate": ["phd", "doctorate", "doctor of", "ph.d"],
        "Master's": ["master", "m.s.", "m.a.", "mba", "master of"],
        "Bachelor's": ["bachelor", "b.s.", "b.a.", "bachelor of"],
        "Associate's": ["associate", "a.s.", "a.a."],
        "High School": ["high school", "diploma", "secondary"]
    }
    
    for level, keywords in degree_levels.items():
        if any(re.search(r'\b' + re.escape(kw) + r'\b', education_section.lower()) for kw in keywords):
            highest_degree = level
            break
    
    # Extract institution names
    institutions = []
    common_institutions = ["university", "college", "institute", "school"]
    for word in common_institutions:
        pattern = r'(\b\w+\s+' + re.escape(word) + r'\b|\b' + re.escape(word) + r' of \w+(\s+\w+)?)'
        matches = re.findall(pattern, education_section, re.IGNORECASE)
        institutions.extend([m[0] for m in matches if m[0]])
    
    # Extract fields of study
    fields = []
    common_fields = ["computer science", "engineering", "business", "administration", "finance", "economics", 
                      "mathematics", "psychology", "biology", "chemistry", "physics", "literature", "history",
                      "communication", "marketing", "management", "accounting", "medicine", "law"]
    
    for field in common_fields:
        if re.search(r'\b' + re.escape(field) + r'\b', education_section.lower()):
            fields.append(field)
    
    # Clean and deduplicate
    institutions = list(set([i.strip().title() for i in institutions]))
    fields = list(set([f.strip().title() for f in fields]))
    
    return {
        "highest_degree": highest_degree,
        "institutions": institutions,
        "fields": fields
    }

def extract_contact_info(text):
    """Extract contact information from resume text."""
    contact_info = {}
    
    # Email
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    emails = re.findall(email_pattern, text)
    if emails:
        contact_info['Email'] = emails[0]
    
    # Phone
    phone_pattern = r'\b(?:\+\d{1,3}[-\s]?)?\(?\d{3}\)?[-\s]?\d{3}[-\s]?\d{4}\b'
    phones = re.findall(phone_pattern, text)
    if phones:
        contact_info['Phone'] = phones[0]
    
    # LinkedIn
    linkedin_pattern = r'(?:linkedin\.com/in/|linkedin:)([A-Za-z0-9_-]+)'
    linkedin = re.findall(linkedin_pattern, text.lower())
    if linkedin:
        contact_info['LinkedIn'] = f"linkedin.com/in/{linkedin[0]}"
    
    # Website/Portfolio
    website_pattern = r'(?:https?://)?(?:www\.)?([A-Za-z0-9][-A-Za-z0-9]*\.)+[A-Za-z]{2,}'
    websites = re.findall(website_pattern, text)
    if websites:
        # Filter out common domains like gmail, linkedin, etc.
        filtered_sites = [site for site in websites if not any(domain in site.lower() for domain in ['gmail', 'yahoo', 'hotmail', 'linkedin', 'facebook', 'twitter'])]
        if filtered_sites:
            contact_info['Website'] = filtered_sites[0]
    
    # Location
    location_patterns = [
        r'(?:location|address|city):\s*([^,\n]+(?:,\s*[A-Z]{2})?)',
        r'\b([A-Za-z\s]+,\s*[A-Z]{2})\b'
    ]
    
    for pattern in location_patterns:
        locations = re.findall(pattern, text)
        if locations:
            contact_info['Location'] = locations[0].strip()
            break
    
    return contact_info

def extract_summary(text):
    """Extract summary or professional statement from resume."""
    summary_patterns = [
        r'(?:summary|profile|objective|about me|professional summary|professional profile)(?:[:\s]*)([^\.]+(?:\.[^\.]+){0,3})',
        r'^([^\.]+(?:\.[^\.]+){0,3})'
    ]
    
    for pattern in summary_patterns:
        summary = re.search(pattern, text, re.IGNORECASE | re.DOTALL)
        if summary:
            # Clean the summary
            clean_summary = re.sub(r'\s+', ' ', summary.group(1).strip())
            return clean_summary
    
    return None

def detect_industry(text):
    """Detect the most likely industry/field from resume text."""
    # Define industry keywords
    industries = {
        "Software Development": ["software", "developer", "programming", "coder", "engineer", "java", "python", "javascript", "web developer"],
        "Data Science": ["data science", "machine learning", "ai", "artificial intelligence", "data mining", "big data", "statistical", "data analysis"],
        "Finance": ["finance", "accounting", "financial", "analyst", "investment", "banking", "budget", "cash flow", "audit"],
        "Marketing": ["marketing", "seo", "social media", "brand", "content", "campaign", "digital marketing", "market research"],
        "Healthcare": ["healthcare", "medical", "clinical", "nurse", "doctor", "patient", "hospital", "pharma", "health"],
        "Education": ["education", "teacher", "professor", "academic", "curriculum", "teaching", "instructor", "school", "university"],
        "Design": ["design", "designer", "creative", "ui", "ux", "graphic", "visual", "artwork", "illustrator", "photoshop"],
        "Engineering": ["engineering", "mechanical", "electrical", "civil", "architect", "structural", "construction"],
        "Project Management": ["project manager", "project management", "agile", "scrum", "pmp", "program", "delivery", "sprint"],
        "Human Resources": ["hr", "human resources", "recruitment", "talent", "hiring", "onboarding", "benefits", "compensation"]
    }
    
    # Count keyword matches for each industry
    industry_scores = {}
    text = text.lower()
    
    for industry, keywords in industries.items():
        score = 0
        for keyword in keywords:
            matches = re.findall(r'\b' + re.escape(keyword) + r'\b', text)
            score += len(matches)
        industry_scores[industry] = score
    
    # Find the industry with the highest score
    if industry_scores:
        top_industry = max(industry_scores.items(), key=lambda x: x[1])
        if top_industry[1] > 0:
            return {"name": top_industry[0], "score": top_industry[1]}
    
    return None

def get_common_industries(exclude=None):
    """Get a list of common industries for the dropdown."""
    industries = [
        "Software Development",
        "Data Science",
        "Finance",
        "Marketing",
        "Healthcare",
        "Education",
        "Design",
        "Engineering",
        "Project Management",
        "Human Resources",
        "Sales",
        "Customer Service",
        "Consulting",
        "Operations",
        "Manufacturing",
        "Legal",
        "Content Writing",
        "Administrative",
        "Research"
    ]
    
    if exclude:
        industries = [i for i in industries if i != exclude]
    
    return industries

def get_industry_keywords(industry):
    """Get common keywords for a specific industry."""
    keywords_by_industry = {
        "Software Development": [
            "software development", "programming", "coding", "java", "python", "javascript", 
            "full stack", "backend", "frontend", "web development", "mobile development",
            "api", "git", "agile", "scrum", "devops", "testing", "debugging", "software engineer"
        ],
        "Data Science": [
            "data science", "machine learning", "ai", "artificial intelligence", "data mining",
            "big data", "statistical analysis", "data visualization", "predictive modeling",
            "data cleaning", "python", "r", "sql", "tableau", "tensorflow", "pytorch", "analytics"
        ],
        "Finance": [
            "financial analysis", "accounting", "budgeting", "forecasting", "investment",
            "financial planning", "portfolio management", "risk assessment", "financial reporting",
            "banking", "taxation", "audit", "compliance", "cpa", "financial statements"
        ],
        "Marketing": [
            "marketing strategy", "digital marketing", "social media", "content marketing",
            "seo", "sem", "analytics", "brand management", "market research", "campaign management",
            "email marketing", "customer acquisition", "marketing automation", "conversion rate"
        ],
        "Healthcare": [
            "patient care", "clinical", "medical", "healthcare", "diagnosis", "treatment",
            "medical records", "hipaa", "patient satisfaction", "healthcare management",
            "medical billing", "electronic health records", "hospital", "nursing", "pharmacy"
        ],
        "Education": [
            "curriculum development", "instruction", "teaching", "assessment", "education",
            "learning management", "student engagement", "classroom management", "pedagogy",
            "educational technology", "student progress", "lesson planning", "academic advising"
        ],
        "Design": [
            "design", "visual design", "ui design", "ux design", "graphic design", "user interface",
            "user experience", "wireframing", "prototyping", "typography", "illustration",
            "adobe creative suite", "photoshop", "illustrator", "indesign", "sketch", "figma"
        ],
        "Engineering": [
            "engineering", "mechanical", "electrical", "civil", "industrial", "product development",
            "cad", "technical specifications", "quality assurance", "testing", "manufacturing",
            "design review", "product lifecycle", "structural analysis", "prototyping", "schematics"
        ],
        "Project Management": [
            "project management", "program management", "agile", "scrum", "waterfall", "pmp",
            "sprint planning", "stakeholder management", "risk management", "timeline",
            "project planning", "resource allocation", "deliverables", "milestones", "kpis"
        ],
        "Human Resources": [
            "recruitment", "talent acquisition", "onboarding", "benefits administration",
            "employee relations", "performance management", "compensation", "training",
            "development", "hr policies", "compliance", "diversity", "inclusion", "employee engagement"
        ],
        "Sales": [
            "sales strategy", "business development", "account management", "client relationship",
            "lead generation", "negotiation", "closing deals", "sales funnel", "crm", "salesforce",
            "revenue growth", "pipeline management", "prospecting", "quota", "sales targets"
        ],
        "Customer Service": [
            "customer support", "client satisfaction", "problem resolution", "customer experience",
            "service level", "call center", "help desk", "ticketing system", "client communication",
            "customer retention", "conflict resolution", "service recovery", "customer feedback"
        ],
        "Consulting": [
            "consulting", "advisory", "client engagement", "solution development", "business analysis",
            "process improvement", "strategy consulting", "change management", "requirements gathering",
            "client presentations", "deliverables", "stakeholder management", "best practices"
        ],
        "Operations": [
            "operations management", "process optimization", "supply chain", "logistics",
            "inventory management", "quality control", "continuous improvement", "lean",
            "six sigma", "resource planning", "operational efficiency", "workflow optimization"
        ],
        "Manufacturing": [
            "manufacturing", "production", "quality control", "assembly", "fabrication",
            "inventory", "supply chain", "lean manufacturing", "quality assurance", "six sigma",
            "production planning", "operational efficiency", "machining", "plant operations"
        ],
        "Legal": [
            "legal counsel", "contract review", "compliance", "regulatory", "litigation",
            "legal research", "case management", "legal writing", "negotiation", "dispute resolution",
            "legal analysis", "corporate law", "intellectual property", "legal documentation"
        ],
        "Content Writing": [
            "content creation", "copywriting", "editing", "proofreading", "creative writing",
            "blogging", "seo writing", "technical writing", "content strategy", "storytelling",
            "content marketing", "article writing", "content management", "editorial", "publishing"
        ],
        "Administrative": [
            "administrative support", "office management", "scheduling", "coordination",
            "documentation", "record keeping", "filing", "data entry", "calendar management",
            "meeting coordination", "office procedures", "executive support", "clerical"
        ],
        "Research": [
            "research methodology", "data collection", "analysis", "literature review",
            "experimental design", "research and development", "hypothesis testing", "qualitative research",
            "quantitative research", "research proposal", "findings", "research paper", "publications"
        ]
    }
    
    # Return keywords for the specified industry or a default list if not found
    return keywords_by_industry.get(industry, ["experience", "skills", "qualified", "professional", "proficient", "expertise", "knowledge", "background", "competent"])

def calculate_ats_score(text, industry):
    """Calculate ATS score based on multiple factors."""
    text = text.lower()
    
    # Define components to evaluate
    components = {
        "Format Quality": {"score": 0, "weight": 15},
        "Keyword Relevance": {"score": 0, "weight": 25},
        "Contact Information": {"score": 0, "weight": 10},
        "Skills Coverage": {"score": 0, "weight": 20},
        "Experience Clarity": {"score": 0, "weight": 15},
        "Education Details": {"score": 0, "weight": 10},
        "Summary/Objective": {"score": 0, "weight": 5}
    }
    
    # 1. Format Quality
    # Check for common format issues
    format_issues = []
    if re.search(r'[^\w\s,.\-():;@]', text):  # Check for special characters
        format_issues.append("Special characters")
    if re.search(r'\s{2,}', text):  # Check for multiple spaces
        format_issues.append("Multiple spaces")
    
    format_score = 100 - (len(format_issues) * 15)
    components["Format Quality"]["score"] = max(0, format_score)
    
    # 2. Keyword Relevance
    industry_keywords = get_industry_keywords(industry)
    keyword_matches = 0
    
    for keyword in industry_keywords:
        if re.search(r'\b' + re.escape(keyword) + r'\b', text):
            keyword_matches += 1
    
    keyword_score = min(100, (keyword_matches / len(industry_keywords) * 100))
    components["Keyword Relevance"]["score"] = keyword_score
    
    # 3. Contact Information
    contact_info = extract_contact_info(text)
    contact_score = min(100, (len(contact_info) / 3) * 100)  # 3 essential contact types
    components["Contact Information"]["score"] = contact_score
    
    # 4. Skills Coverage
    all_skills = extract_skills_from_text(text)
    tech_skills = [skill for skill, category in all_skills if category == 'technical']
    soft_skills = [skill for skill, category in all_skills if category == 'soft']
    
    skill_score = 0
    if tech_skills:
        skill_score += 50  # 50% for technical skills presence
    if soft_skills:
        skill_score += 30  # 30% for soft skills presence
    if len(tech_skills) >= 5 and len(soft_skills) >= 3:
        skill_score += 20  # 20% for good balance and quantity
    
    components["Skills Coverage"]["score"] = skill_score
    
    # 5. Experience Clarity
    experience_years, experience_details = extract_experience_details(text)
    
    experience_score = 0
    if experience_years:
        experience_score += 50  # 50% for clear years
    if experience_details:
        experience_score += 50 * min(1, len(experience_details) / 2)  # Up to 50% based on details
    
    components["Experience Clarity"]["score"] = experience_score
    
    # 6. Education Details
    education_details = extract_education_dynamic(text)
    
    education_score = 0
    if education_details:
        if education_details.get("highest_degree", "Not Specified") != "Not Specified":
            education_score += 40  # 40% for degree specification
        if education_details.get("institutions", []):
            education_score += 30  # 30% for institution(s)
        if education_details.get("fields", []):
            education_score += 30  # 30% for field(s) of study
    
    components["Education Details"]["score"] = education_score
    
    # 7. Summary/Objective
    summary = extract_summary(text)
    
    summary_score = 0
    if summary:
        summary_score += 70  # 70% for having a summary
        if len(summary.split()) >= 20:
            summary_score += 30  # 30% for a substantial summary
    
    components["Summary/Objective"]["score"] = summary_score
    
    # Calculate weighted score
    final_score = 0
    total_weight = 0
    
    for component, details in components.items():
        final_score += details["score"] * details["weight"]
        total_weight += details["weight"]
    
    final_score = round(final_score / total_weight)
    
    return final_score, components

def generate_ats_recommendations(text, ats_components, industry):
    """Generate recommendations for improving ATS score."""
    recommendations = {
        "Format & Structure": [],
        "Keywords & Content": [],
        "Missing Elements": []
    }
    
    # Analyze format issues
    if ats_components["Format Quality"]["score"] < 80:
        recommendations["Format & Structure"].append("Ensure your resume uses a clean, ATS-friendly format without tables, columns, or graphics.")
        recommendations["Format & Structure"].append("Use standard section headers (e.g., 'Experience', 'Education', 'Skills').")
        recommendations["Format & Structure"].append("Avoid special characters and symbols that may confuse ATS systems.")
    
    # Analyze keyword issues
    if ats_components["Keyword Relevance"]["score"] < 70:
        industry_keywords = get_industry_keywords(industry)
        found_keywords = []
        for keyword in industry_keywords:
            if re.search(r'\b' + re.escape(keyword) + r'\b', text.lower()):
                found_keywords.append(keyword)
        
        missing_keywords = [k for k in industry_keywords if k not in found_keywords]
        if missing_keywords:
            recommendations["Keywords & Content"].append(f"Add more industry-specific keywords relevant to {industry}.")
            recommendations["Keywords & Content"].append("Tailor your resume for each job application by including keywords from the job description.")
    
    # Skills recommendations
    all_skills = extract_skills_from_text(text)
    tech_skills = [skill for skill, category in all_skills if category == 'technical']
    soft_skills = [skill for skill, category in all_skills if category == 'soft']
    
    if len(tech_skills) < 5:
        recommendations["Keywords & Content"].append("Include more technical skills relevant to your target role.")
    if len(soft_skills) < 3:
        recommendations["Keywords & Content"].append("Add more soft skills to demonstrate your well-rounded capabilities.")
    
    # Contact information issues
    contact_info = extract_contact_info(text)
    essential_contacts = ['Email', 'Phone', 'LinkedIn']
    missing = [item for item in essential_contacts if item not in contact_info]
    if missing:
        recommendations["Missing Elements"].append(f"Add missing contact information: {', '.join(missing)}.")
    
    # Experience issues
    if ats_components["Experience Clarity"]["score"] < 70:
        recommendations["Format & Structure"].append("Use clear date formats for your work experience (MM/YYYY - MM/YYYY).")
        recommendations["Keywords & Content"].append("Quantify your achievements with metrics and results where possible.")
    
    # Education issues
    if ats_components["Education Details"]["score"] < 70:
        recommendations["Format & Structure"].append("Clearly structure your education section with degree, institution, and graduation year.")
    
    # Summary issues
    if ats_components["Summary/Objective"]["score"] < 50:
        recommendations["Missing Elements"].append("Add a concise professional summary that highlights your key qualifications.")
    
    # General recommendations
    if not recommendations["Keywords & Content"]:
        recommendations["Keywords & Content"].append("Use action verbs to describe your achievements and responsibilities.")
    
    return recommendations

def analyze_resume_format(text):
    """Analyze resume formatting for ATS compatibility."""
    format_issues = []
    
    # Check for tables/columns indicators
    if re.search(r'\|\s*\|', text) or re.search(r'\+---+\+', text):
        format_issues.append("Possible table structure detected - tables can confuse ATS systems")
    
    # Check for fancy characters
    if re.search(r'[‚ò∫‚òª‚ô•‚ô¶‚ô£‚ô†‚Ä¢‚óò‚óã‚óô‚ôÇ‚ôÄ‚ô™‚ô´‚òº‚ñ∫‚óÑ‚Üï‚Äº¬∂¬ß‚ñ¨‚Ü®‚Üë‚Üì‚Üí‚Üê‚àü‚Üî‚ñ≤‚ñº]', text):
        format_issues.append("Special characters detected - replace with standard text characters")
    
    # Check for inconsistent spacing
    if re.search(r'\n\s*\n\s*\n', text):
        format_issues.append("Inconsistent spacing detected - maintain consistent formatting")
    
    # Check for potential header/footer content
    header_footer_patterns = [
        r'page \d+ of \d+',
        r'confidential',
        r'resume of',
        r'curriculum vitae of'
    ]
    
    for pattern in header_footer_patterns:
        if re.search(pattern, text, re.IGNORECASE):
            format_issues.append("Headers/footers detected - remove them for better ATS compatibility")
            break
    
    # Check for image indicators
    image_indicators = [
        r'\[image\]',
        r'\[photo\]',
        r'\[logo\]',
        r'<img',
        r'data:image'
    ]
    
    for indicator in image_indicators:
        if re.search(indicator, text, re.IGNORECASE):
            format_issues.append("Image detected - remove images for better ATS compatibility")
            break
    
    # Check file format implications
    if re.search(r'\.docx|\.pdf|\.doc|\.rtf', text, re.IGNORECASE):
        format_issues.append("Ensure you're using a standard .docx or .pdf format for ATS compatibility")
    
    return format_issues

def analyze_keyword_positions(text, keywords):
    """Analyze positioning of keywords throughout the resume."""
    paragraphs = re.split(r'\n\s*\n', text)
    
    # Check if keywords appear in the first paragraph (summary/objective)
    first_para_keywords = 0
    if paragraphs:
        for keyword in keywords:
            if re.search(r'\b' + re.escape(keyword) + r'\b', paragraphs[0].lower()):
                first_para_keywords += 1
    
    first_para_score = min(100, (first_para_keywords / max(1, len(keywords))) * 100)
    
    # Check for keyword distribution throughout the document
    keyword_positions = []
    for keyword in keywords:
        for match in re.finditer(r'\b' + re.escape(keyword) + r'\b', text.lower()):
            position = match.start() / len(text)  # Normalized position (0-1)
            keyword_positions.append(position)
    
    # Calculate distribution score
    distribution_score = 0
    if keyword_positions:
        # Divide document into sections and check coverage
        sections = 4
        section_counts = [0] * sections
        
        for pos in keyword_positions:
            section_idx = min(int(pos * sections), sections - 1)
            section_counts[section_idx] += 1
        
        # Calculate how many sections have at least one keyword
        covered_sections = sum(1 for count in section_counts if count > 0)
        distribution_score = (covered_sections / sections) * 100
    
    # Final score is average of first paragraph and distribution scores
    position_score = (first_para_score + distribution_score) / 2
    
    # Generate suggestion
    if first_para_score < 50:
        suggestion = "Include more relevant keywords in your summary or objective section."
    elif distribution_score < 50:
        suggestion = "Distribute your keywords more evenly throughout your resume."
    else:
        suggestion = "Your keyword positioning looks good! Keywords are well distributed throughout your resume."
    
    return {
        "score": round(position_score),
        "first_paragraph_score": round(first_para_score),
        "distribution_score": round(distribution_score),
        "suggestion": suggestion
    }

# Define a basic industry classifier using a simple model
def get_industry_model():
    """Return a simple industry classification model."""
    # Return a simple dictionary-based approach
    # In a real application, this might be a more sophisticated ML model
    return {
        "industry_keywords": {
            "Software Development": ["software", "developer", "programming", "java", "python", "javascript"],
            "Data Science": ["data science", "machine learning", "ai", "statistics", "analytics"],
            "Finance": ["finance", "accounting", "financial", "investment", "banking"],
            "Marketing": ["marketing", "digital", "social media", "brand", "content"],
            "Healthcare": ["healthcare", "medical", "clinical", "patient", "health"],
            "Education": ["education", "teacher", "academic", "student", "teaching"],
            "Design": ["design", "ui", "ux", "graphic", "creative"],
            "Engineering": ["engineering", "mechanical", "electrical", "civil", "structural"],
            "Project Management": ["project manager", "agile", "scrum", "pmp", "delivery"],
            "Human Resources": ["hr", "human resources", "recruitment", "talent", "hiring"]
        },
        "predict": lambda text, keywords: {
            name: sum(1 for kw in kws if re.search(r'\b' + re.escape(kw) + r'\b', text.lower()))
            for name, kws in keywords.items()
        }
    }

def classify_industry(text):
    """Classify the industry of a resume using the simple model."""
    model = get_industry_model()
    scores = model["predict"](text, model["industry_keywords"])
    
    # Get the industry with the highest score
    if scores:
        max_industry = max(scores.items(), key=lambda x: x[1])
        if max_industry[1] > 0:
            return max_industry[0]
    
    return "General"