In [30]:
import json
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import spacy
import google.generativeai as genai
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    pipeline
)
from langchain_google_genai import ChatGoogleGenerativeAI
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class AdvancedTechResumeAnalyzer:
    def __init__(self, api_key: str):
        # Advanced Tech Skill Priorities (multi-dimensional weighted algorithm)
        self.tech_skill_hierarchy = {
            "core_technical_skills": {
                "programming_languages": {
                    "primary": ["Python", "Java", "C++", "Rust"],
                    "secondary": ["JavaScript", "TypeScript", "Go"],
                    "emerging": ["Kotlin", "Swift", "Julia"]
                },
                "frameworks": {
                    "web": ["React", "Vue", "Angular", "Next.js"],
                    "backend": ["Django", "Spring", "FastAPI", "Express"],
                    "ml": ["TensorFlow", "PyTorch", "Keras"]
                }
            },
            "cloud_and_devops": {
                "cloud_platforms": ["AWS", "GCP", "Azure", "Oracle Cloud"],
                "containerization": ["Docker", "Kubernetes", "Helm"],
                "ci_cd": ["Jenkins", "GitLab CI", "GitHub Actions"]
            },
            "emerging_technologies": {
                "ai_ml": ["Machine Learning", "Deep Learning", "NLP", "Computer Vision"],
                "blockchain": ["Solidity", "Ethereum", "Smart Contracts"],
                "quantum_computing": ["Qiskit", "Quantum Algorithms"]
            }
        }

        # Initialize pre-trained models
        self._load_pretrained_models()

        # Initialize Gemini LLM
        genai.configure(api_key=api_key)
        self.llm = ChatGoogleGenerativeAI(
            model="gemini-pro",
            google_api_key=api_key,
            temperature=0.3
        )

    def _load_pretrained_models(self):
        """
        Load multiple pre-trained models for comprehensive analysis
        """
        # Skill Recommendation Model
        self.skill_classifier = pipeline("zero-shot-classification")

        # Sentiment and Tone Analysis
        self.sentiment_analyzer = pipeline("sentiment-analysis")

        # Named Entity Recognition
        try:
            self.nlp = spacy.load("en_core_web_lg")
        except OSError:
            spacy.cli.download("en_core_web_lg")
            self.nlp = spacy.load("en_core_web_lg")

    def analyze_resume(self, resume_data: dict) -> dict:
        """
        Comprehensive resume analysis with multiple model insights
        """
        # Convert resume to text for NLP processing
        resume_text = self._convert_resume_to_text(resume_data)

        # Core Analysis Components
        skill_analysis = self._advanced_skill_analysis(resume_data)
        project_insights = self._analyze_projects(resume_data)
        soft_skills_assessment = self._assess_soft_skills(resume_text)

        # Generate Comprehensive Recommendations
        recommendations = self._generate_holistic_recommendations(
            resume_data,
            skill_analysis,
            project_insights,
            soft_skills_assessment
        )

        # Visualizations
        self._create_skill_radar_chart(skill_analysis)
        self._create_project_complexity_chart(project_insights)

        return {
            "skill_analysis": skill_analysis,
            "project_insights": project_insights,
            "soft_skills": soft_skills_assessment,
            "recommendations": recommendations,
            "overall_potential_score": self._calculate_potential_score(
                skill_analysis,
                project_insights,
                soft_skills_assessment
            )
        }

    def _advanced_skill_analysis(self, resume_data: dict) -> dict:
        """
        Multi-dimensional skill analysis with hierarchical evaluation
        """
        skills = resume_data.get('skills', {})
        skill_analysis = {
            "technical_depth": self._evaluate_skill_depth(skills),
            "technology_breadth": self._assess_technology_breadth(skills),
            "skill_alignment": self._check_skill_alignment(skills)
        }
        return skill_analysis

    def _evaluate_skill_depth(self, skills: dict) -> dict:

      depth_scores = {}
      for category, skill_list in skills.items():
          matching_skills = []
          for tech_category, tech_groups in self.tech_skill_hierarchy.items():
              for group_name, group_skills in tech_groups.items():
                  # Check if group_skills is a dictionary or a list
                  if isinstance(group_skills, dict):
                      for skill_level, skill_set in group_skills.items():
                          matches = [s for s in skill_list if
                                    any(tech.lower() in s.lower() for tech in skill_set)]
                          matching_skills.extend(matches)
                  elif isinstance(group_skills, list):
                      matches = [s for s in skill_list if
                                any(tech.lower() in s.lower() for tech in group_skills)]
                      matching_skills.extend(matches)

          depth_scores[category] = {
              "unique_skills": len(set(matching_skills)),
              "skill_diversity_score": len(set(matching_skills)) * 10
          }

      return depth_scores

    def _assess_technology_breadth(self, skills: dict) -> float:
        """
        Calculate technology breadth score
        """
        total_skills = sum(len(skill_list) for skill_list in skills.values())
        return min(total_skills * 5, 100)  # Cap at 100

    def _check_skill_alignment(self, skills: dict) -> dict:
        """
        Check skill alignment with industry trends
        """
        alignment_score = {}
        for category, skill_list in skills.items():
            # Zero-shot classification for skill relevance
            classification_results = self.skill_classifier(
                skill_list[0] if skill_list else "",
                candidate_labels=[
                    "highly relevant",
                    "moderately relevant",
                    "low relevance"
                ]
            )
            alignment_score[category] = classification_results

        return alignment_score

    def _analyze_projects(self, resume_data: dict) -> dict:
        """
        Comprehensive project analysis
        """
        projects = resume_data.get('projects', [])

        project_insights = {
            "total_projects": len(projects),
            "technology_diversity": self._calculate_project_tech_diversity(projects),
            "complexity_analysis": self._assess_project_complexity(projects),
            "hackathon_impact": self._evaluate_hackathon_contributions(projects)
        }

        return project_insights

    def _calculate_project_tech_diversity(self, projects: list) -> int:
        """
        Calculate technology diversity across projects
        """
        all_technologies = set()
        for project in projects:
            all_technologies.update(project.get('technologies_used', []))
        return len(all_technologies)

    def _assess_project_complexity(self, projects: list) -> dict:
        """
        Assess project complexity and impact
        """
        complexity_scores = []
        for project in projects:
            score = 0
            score += len(project.get('technologies_used', [])) * 2
            score += 10 if 'Hackathon' in project.get('description', '') else 0
            complexity_scores.append(score)

        return {
            "avg_complexity": np.mean(complexity_scores) if complexity_scores else 0,
            "max_complexity": max(complexity_scores) if complexity_scores else 0
        }

    def _evaluate_hackathon_contributions(self, projects: list) -> dict:
        """
        Evaluate hackathon-related project contributions
        """
        hackathon_projects = [p for p in projects if 'Hackathon' in p.get('description', '')]
        return {
            "total_hackathons": len(hackathon_projects),
            "achievements": sum(1 for p in hackathon_projects if 'Place' in p.get('description', ''))
        }

    def _assess_soft_skills(self, resume_text: str) -> dict:
        """
        Assess soft skills using NLP and sentiment analysis
        """
        # Sentiment analysis
        sentiment = self.sentiment_analyzer(resume_text[:512])[0]

        # Named entity extraction for potential leadership indicators
        doc = self.nlp(resume_text)
        leadership_indicators = [ent.text for ent in doc.ents if ent.label_ in ['ORG', 'PERSON']]

        return {
            "communication_tone": sentiment,
            "leadership_potential": len(leadership_indicators) > 3,
            "collaborative_indicators": len(leadership_indicators)
        }

    def _generate_holistic_recommendations(
        self,
        resume_data: dict,
        skill_analysis: dict,
        project_insights: dict,
        soft_skills: dict
    ) -> dict:
        """
        Generate comprehensive, actionable recommendations
        """
        recommendations = {
            "technical_skills": [
                f"Expand skills in emerging technologies like {', '.join(self.tech_skill_hierarchy['emerging_technologies']['ai_ml'])}",
                "Consider certifications in cloud platforms and DevOps",
                f"Focus on mastering {', '.join(self.tech_skill_hierarchy['core_technical_skills']['programming_languages']['emerging'])} languages"
            ],
            "project_development": [
                f"Increase project diversity. Currently {project_insights['total_projects']} projects",
                "Aim to contribute to open-source projects",
                "Participate in more hackathons and innovation challenges"
            ],
            "soft_skills": [
                "Develop stronger communication and collaboration skills",
                "Seek leadership roles in academic or professional projects",
                "Create a portfolio showcasing problem-solving abilities"
            ],
            "participation": [
                "Join technical communities and professional networks",
                "Attend industry conferences and workshops",
                "Contribute to technical blogs or create technical content"
            ]
        }

        return recommendations

    def _calculate_potential_score(
        self,
        skill_analysis: dict,
        project_insights: dict,
        soft_skills: dict
    ) -> float:
        """
        Calculate overall potential score
        """
        technical_score = np.mean([
            skill_analysis['technology_breadth'],
            project_insights['complexity_analysis']['avg_complexity']
        ])

        soft_skill_multiplier = 1.2 if soft_skills['leadership_potential'] else 1.0

        return min(technical_score * soft_skill_multiplier, 100)

    def _convert_resume_to_text(self, resume_data: dict) -> str:
        """
        Convert resume to comprehensive text
        """
        text_parts = []
        for key, value in resume_data.items():
            if isinstance(value, dict):
                text_parts.append(f"{key.replace('_', ' ').title()}: {json.dumps(value)}")
            elif isinstance(value, list):
                text_parts.append(f"{key.replace('_', ' ').title()}: {', '.join(str(v) for v in value)}")

        return " ".join(text_parts)

    def _create_skill_radar_chart(self, skill_analysis: dict):
        """
        Create radar chart for skill analysis
        """
        categories = list(skill_analysis.keys())
        values = [
            skill_analysis[cat]['technology_breadth']
            if isinstance(skill_analysis[cat], dict) and 'technology_breadth' in skill_analysis[cat]
            else 50
            for cat in categories
        ]

        plt.figure(figsize=(8, 8))
        angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False)
        values = np.concatenate((values, [values[0]]))
        angles = np.concatenate((angles, [angles[0]]))

        plt.polar(angles, values, 'o-', linewidth=2)
        plt.fill(angles, values, alpha=0.25)
        plt.xticks(angles[:-1], categories)
        plt.title('Skill Analysis Radar', fontsize=15)
        plt.tight_layout()
        plt.savefig('skill_radar.png')
        plt.close()

    def _create_project_complexity_chart(self, project_insights: dict):
        """
        Create bar chart for project complexity
        """
        plt.figure(figsize=(10, 6))
        metrics = ['total_projects', 'technology_diversity']
        values = [
            project_insights['total_projects'],
            project_insights.get('technology_diversity', 0)
        ]

        plt.bar(metrics, values, color=['#1E90FF', '#2E8B57'])
        plt.title('Project Insights', fontsize=15)
        plt.ylabel('Count', fontsize=12)

        for i, v in enumerate(values):
            plt.text(i, v+0.1, str(v), ha='center', fontsize=10)

        plt.tight_layout()
        plt.savefig('project_complexity.png')
        plt.close()

def main():
    # Load API key from environment
    api_key = "AIzaSyCjMNWVDFBNU6F0k5UC_6jt5OY2gJCLPSo"

    # Sample resume data (your provided resume)
    resume_data = {
    "personal_details": {
      "name": "Ninad Sachin Maadhavi",
      "email": "ninad.maadhavi22spit.ac.in",
      "phone_number": "+91 8291101302",
      "location": "Mumbai, Maharashtra, India",
      "linkedin_url": "LinkedIn",
      "github_url": "GitHub"
    },
    "education": [
      {
        "institution": "Sardar Patel Institute of Technology, Mumbai",
        "degree": "B.Tech - Computer Science and Engineering (AIML)",
        "graduation_date": "2026",
        "cgpa": "8.61",
        "location": "Mumbai, Maharashtra, India"
      },
      {
        "institution": "Matoshree Prabhodhinee Jr. College of Science, Thane",
        "degree": "Maharashtra State Board (HSC)",
        "graduation_date": "2022",
        "percentage": "82.5",
        "location": "Mumbai, Maharashtra, India"
      }
    ],
    "research_experiences": [],
    "projects": [
      {
        "title": "Study Sync",
        "description": "Developed a web-based platform to support students' overall growth by promoting socialization based on similar academic interests. Secured 2nd place in the SE Hackathon.",
        "technologies_used": [
          "MongoDB",
          "Express",
          "React",
          "Node (MERN)"
        ],
        "date_range": "Apr 2024"
      },
      {
        "title": "IPL Auction Website",
        "description": "Assisted in the frontend development of a website to simulate the experience of an actual IPL Auction. Deployed the website on Vercel and Render.",
        "technologies_used": [
          "MongoDB",
          "Express",
          "React",
          "Node (MERN)"
        ],
        "date_range": "Dec 2023 - Mar 2024"
      },
      {
        "title": "Travel Smart",
        "description": "Developed an AI-powered chatbot with advanced image recognition facilities to deliver personalized travel recommendations. Secured 4th place in Recursion 5.0 Hackathon.",
        "technologies_used": [
          "MongoDB",
          "Express",
          "React",
          "Node (MERN)"
        ],
        "date_range": "Mar 2024"
      }
    ],
    "skills": {
      "programming_languages": [
        "C",
        "C++",
        "Java",
        "Python"
      ],
      "web_development": [
        "HTML",
        "CSS",
        "JavaScript",
        "TypeScript",
        "React",
        "Node",
        "Express",
        "Tailwind"
      ],
      "databases": [
        "MySQL",
        "MongoDB"
      ],
      "developer_tools": [
        "VS Code",
        "GitHub"
      ]
    },
    "achievements": [
      {
        "title": "SE Hackathon",
        "description": "2nd Place",
        "awarding_organization": "Sardar Patel Institute of Technology, Mumbai"
      },
      {
        "title": "Recursion 5.0 Hackathon",
        "description": "4th Place",
        "awarding_organization": "Ragiv Gandhi Institute of Technology, Mumbai"
      }
    ],
    "leadership_experience": [
      {
        "title": "Extracurricular Technical Head",
        "description": "Led a team of 5 in developing a website for the IPL Auction event. Coordinated offline and online slots, managed participant needs, and ensured a seamless user experience.",
        "organization": "Oculus - IPL Auction",
        "date_range": "Dec 2023 - Mar 2024"
      }
    ],
    "certifications": []
  }

    # Initialize analyzer
    analyzer = AdvancedTechResumeAnalyzer(api_key)

    # Analyze resume
    analysis_result = analyzer.analyze_resume(resume_data)

    # Print results
    print(json.dumps(analysis_result, indent=2))

if __name__ == "__main__":
    main()

# Requirements
"""
google-generativeai
langchain
python-dotenv
langchain-google-genai
matplotlib
seaborn
pandas
numpy
transformers
spacy
torch
scikit-learn
"""

No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.
No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


{
  "skill_analysis": {
    "technical_depth": {
      "programming_languages": {
        "unique_skills": 3,
        "skill_diversity_score": 30
      },
      "web_development": {
        "unique_skills": 4,
        "skill_diversity_score": 40
      },
      "databases": {
        "unique_skills": 1,
        "skill_diversity_score": 10
      },
      "developer_tools": {
        "unique_skills": 0,
        "skill_diversity_score": 0
      }
    },
    "technology_breadth": 80,
    "skill_alignment": {
      "programming_languages": {
        "sequence": "C",
        "labels": [
          "moderately relevant",
          "highly relevant",
          "low relevance"
        ],
        "scores": [
          0.5579227805137634,
          0.37629997730255127,
          0.0657772347331047
        ]
      },
      "web_development": {
        "sequence": "HTML",
        "labels": [
          "moderately relevant",
          "highly relevant",
          "low relevance"
        ],
        "sc

'\ngoogle-generativeai\nlangchain\npython-dotenv\nlangchain-google-genai\nmatplotlib\nseaborn\npandas\nnumpy\ntransformers\nspacy\ntorch\nscikit-learn\n'