In [44]:
!pip install -q langgraph langchain langchain-google-genai langchain-community
!pip install -q chromadb sentence-transformers tavily-python
!pip install -q PyPDF2 requests beautifulsoup4 python-dotenv
!pip install -q langchain-core typing-extensions
!pip install -q streamlit pyngrok
!ngrok config add-authtoken 2ya3EMOR6COew8KQqMsDFjlgT1y_4J5KknWjtEuVZ3JMQw1gp

/bin/bash: line 1: ngrok: command not found


In [None]:
import os
import json
import re
from typing import Dict, List, Any, Optional, TypedDict
from dataclasses import dataclass, asdict
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
import streamlit as st
from pyngrok import ngrok

# Core imports
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import JsonOutputParser

# LangGraph imports
from langgraph.graph import StateGraph, END

# Vector store and embeddings
import chromadb
from chromadb.utils import embedding_functions
from sentence_transformers import SentenceTransformer

# API and utilities
from tavily import TavilyClient
import PyPDF2
import requests
from bs4 import BeautifulSoup

In [46]:

GOOGLE_API_KEY = "AIzaSyCidwla48YGqA2EzQBH_cVp9pA9XxvxO-s"
TAVILY_API_KEY = "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"


os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
genai.configure(api_key=GOOGLE_API_KEY)


llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.1,
    google_api_key=GOOGLE_API_KEY
)


tavily_client = TavilyClient(api_key=TAVILY_API_KEY)


embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
chroma_client = chromadb.Client()


In [47]:
@dataclass
class StudentProfile:
    """Student profile data structure"""
    name: str
    languages: List[str]
    frameworks: List[str]
    domains: List[str]
    projects: List[str]
    models: List[str]
    skills: List[str]
    experience_years: int
    education: str

@dataclass
class JobRequirements:
    """Job requirements data structure"""
    role: str
    required_skills: List[str]
    preferred_skills: List[str]
    tools: List[str]
    frameworks: List[str]
    experience_required: str
    deliverables: List[str]
    salary_range: str

@dataclass
class GapAnalysis:
    """Gap analysis result structure"""
    student_name: str
    target_role: str
    skill_match_percentage: float
    matched_skills: List[str]
    missing_skills: List[str]
    recommendations: List[str]
    priority_areas: List[str]

In [48]:
class StudentAnalyzerAgent:
    """Agent to analyze student profile and extract structured information"""

    def __init__(self, llm):
        self.llm = llm
        self.name = "Student Analyzer"

    def parse_resume_text(self, resume_text: str) -> StudentProfile:
        """Parse resume text and extract structured information"""

        # Fixed prompt template - escaped curly braces properly
        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an expert resume analyzer. Extract structured information from the resume text.
            Focus on technical skills, programming languages, frameworks, AI/ML models used, domains, and projects.

            Return a JSON object with the following structure:
            {{
                "name": "student name",
                "languages": ["Python", "JavaScript"],
                "frameworks": ["TensorFlow", "React"],
                "domains": ["Computer Vision", "NLP"],
                "projects": ["project descriptions"],
                "models": ["CNN", "LSTM", "BERT"],
                "skills": ["technical skills"],
                "experience_years": 0,
                "education": "degree information"
            }}
            """),
            ("human", "Resume text:\n{resume_text}")
        ])

        chain = prompt | self.llm | JsonOutputParser()
        result = chain.invoke({"resume_text": resume_text})

        return StudentProfile(**result)

    def analyze_github_profile(self, github_url: str) -> Dict[str, Any]:
        """Analyze GitHub profile (simplified version)"""
        # In a real implementation, you would use GitHub API
        # This is a simplified version

        prompt = ChatPromptTemplate.from_messages([
            ("system", """Based on the GitHub URL provided, infer likely technologies and skills.
            Return a JSON object with inferred skills, languages, and project types."""),
            ("human", "GitHub URL: {github_url}")
        ])

        chain = prompt | self.llm | JsonOutputParser()
        result = chain.invoke({"github_url": github_url})

        return result

    def extract_project_skills(self, project_description: str) -> Dict[str, Any]:
        """Extract skills and technologies from project description"""

        prompt = ChatPromptTemplate.from_messages([
            ("system", """Analyze the project description and extract:
            - Technologies used
            - AI/ML models implemented
            - Domain/industry focus
            - Technical complexity level

            Return as JSON."""),
            ("human", "Project description:\n{project_description}")
        ])

        chain = prompt | self.llm | JsonOutputParser()
        result = chain.invoke({"project_description": project_description})

        return result

In [49]:
class JobRoleEvaluatorAgent:
    """Agent to evaluate job requirements using Tavily API and RAG"""

    def __init__(self, llm, tavily_client):
        self.llm = llm
        self.tavily_client = tavily_client
        self.name = "Job Role Evaluator"
        self.setup_vector_store()

    def setup_vector_store(self):
        """Setup vector store for job descriptions"""
        try:
            self.collection = chroma_client.get_collection("job_descriptions")
        except:
            self.collection = chroma_client.create_collection(
                name="job_descriptions",
                embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
                    model_name="all-MiniLM-L6-v2"
                )
            )

    def search_job_descriptions(self, role: str, count: int = 5) -> List[Dict]:
        """Search for job descriptions using Tavily API"""

        query = f"{role} job description requirements skills 2024"

        try:
            search_result = self.tavily_client.search(
                query=query,
                search_depth="advanced",
                max_results=count
            )

            job_descriptions = []
            for result in search_result.get('results', []):
                job_descriptions.append({
                    'title': result.get('title', ''),
                    'content': result.get('content', ''),
                    'url': result.get('url', ''),
                    'score': result.get('score', 0)
                })

            return job_descriptions

        except Exception as e:
            print(f"Error searching jobs: {e}")
            return []

    def extract_job_requirements(self, job_descriptions: List[Dict]) -> JobRequirements:
        """Extract structured requirements from job descriptions"""

        combined_content = "\n\n".join([jd['content'] for jd in job_descriptions])

        prompt = ChatPromptTemplate.from_messages([
            ("system", """Analyze multiple job descriptions and extract consolidated requirements.
            Focus on the most commonly mentioned skills, tools, and requirements.

            Return a JSON object with:
            {
                "role": "job role title",
                "required_skills": ["skill1", "skill2", ...],
                "preferred_skills": ["skill1", "skill2", ...],
                "tools": ["tool1", "tool2", ...],
                "frameworks": ["framework1", "framework2", ...],
                "experience_required": "X years",
                "deliverables": ["deliverable1", "deliverable2", ...],
                "salary_range": "salary information if available"
            }
            """),
            ("human", "Job descriptions:\n{job_content}")
        ])

        chain = prompt | self.llm | JsonOutputParser()
        result = chain.invoke({"job_content": combined_content})

        return JobRequirements(**result)

    def store_job_descriptions(self, job_descriptions: List[Dict], role: str):
        """Store job descriptions in vector store for RAG"""

        documents = []
        metadatas = []
        ids = []

        for i, jd in enumerate(job_descriptions):
            documents.append(jd['content'])
            metadatas.append({
                'title': jd['title'],
                'url': jd['url'],
                'role': role,
                'timestamp': datetime.now().isoformat()
            })
            ids.append(f"{role}_{i}_{datetime.now().timestamp()}")

        self.collection.add(
            documents=documents,
            metadatas=metadatas,
            ids=ids
        )


In [50]:
class GapAnalyzerAgent:
    """Agent to analyze gaps between student profile and job requirements"""

    def __init__(self, llm):
        self.llm = llm
        self.name = "Gap Analyzer"

    def calculate_skill_match(self, student: StudentProfile, job: JobRequirements) -> float:
        """Calculate skill match percentage"""

        student_skills = set(
            student.languages + student.frameworks +
            student.skills + student.models + student.domains
        )

        job_skills = set(
            job.required_skills + job.preferred_skills +
            job.tools + job.frameworks
        )

        # Normalize skills (convert to lowercase for comparison)
        student_skills_normalized = {skill.lower().strip() for skill in student_skills}
        job_skills_normalized = {skill.lower().strip() for skill in job_skills}

        matches = student_skills_normalized.intersection(job_skills_normalized)

        if len(job_skills_normalized) == 0:
            return 0.0

        match_percentage = (len(matches) / len(job_skills_normalized)) * 100
        return round(match_percentage, 2)

    def identify_skill_gaps(self, student: StudentProfile, job: JobRequirements) -> Dict[str, List[str]]:
        """Identify matched and missing skills"""

        student_skills = set(
            student.languages + student.frameworks +
            student.skills + student.models + student.domains
        )

        job_skills = set(
            job.required_skills + job.preferred_skills +
            job.tools + job.frameworks
        )

        student_skills_normalized = {skill.lower().strip() for skill in student_skills}
        job_skills_normalized = {skill.lower().strip() for skill in job_skills}

        matched = list(student_skills_normalized.intersection(job_skills_normalized))
        missing = list(job_skills_normalized - student_skills_normalized)

        return {
            "matched": matched,
            "missing": missing
        }

    def generate_recommendations(self, student: StudentProfile, job: JobRequirements, gap_analysis: Dict) -> List[str]:
        """Generate personalized recommendations"""

        missing_skills = gap_analysis["missing"]

        prompt = ChatPromptTemplate.from_messages([
            ("system", """You are a career advisor. Based on the student's current profile and missing skills for their target role,
            provide specific, actionable recommendations. Focus on:
            1. Courses or certifications to take
            2. Projects to build
            3. Tools to learn
            4. Skills to develop

            Make recommendations practical and prioritized."""),
            ("human", """
            Student Profile: {student_profile}
            Target Role: {target_role}
            Missing Skills: {missing_skills}
            Matched Skills: {matched_skills}

            Provide 5-7 specific recommendations.
            """)
        ])

        chain = prompt | self.llm
        result = chain.invoke({
            "student_profile": json.dumps(asdict(student), indent=2),
            "target_role": job.role,
            "missing_skills": missing_skills,
            "matched_skills": gap_analysis["matched"]
        })

        # Extract recommendations from the response
        recommendations = result.content.split('\n')
        recommendations = [rec.strip() for rec in recommendations if rec.strip() and len(rec.strip()) > 10]

        return recommendations[:7]  # Return top 7 recommendations

In [51]:
class StudentEvaluationSystem:
    """Main system orchestrating all agents using LangGraph"""

    def __init__(self):
        self.student_analyzer = StudentAnalyzerAgent(llm)
        self.job_evaluator = JobRoleEvaluatorAgent(llm, tavily_client)
        self.gap_analyzer = GapAnalyzerAgent(llm)
        self.setup_graph()

    def setup_graph(self):
        """Setup LangGraph workflow"""

        # Define the state structure
        class AgentState(TypedDict):
            student_data: Dict[str, Any]
            job_data: Dict[str, Any]
            gap_analysis: Dict[str, Any]
            final_report: Dict[str, Any]
            target_role: str
            current_step: str
            messages: List[str]

        # Create the graph
        workflow = StateGraph(AgentState)

        # Add nodes
        workflow.add_node("analyze_student", self.analyze_student_node)
        workflow.add_node("evaluate_job", self.evaluate_job_node)
        workflow.add_node("analyze_gaps", self.analyze_gaps_node)
        workflow.add_node("generate_report", self.generate_report_node)

        # Set entry point and add edges
        workflow.set_entry_point("analyze_student")
        workflow.add_edge("analyze_student", "evaluate_job")
        workflow.add_edge("evaluate_job", "analyze_gaps")
        workflow.add_edge("analyze_gaps", "generate_report")
        workflow.add_edge("generate_report", END)

        # Compile the graph
        self.app = workflow.compile()

    def analyze_student_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
        """Node to analyze student profile"""
        print("🔍 Analyzing student profile...")

        # Initialize messages if not exists
        if "messages" not in state:
            state["messages"] = []

        state["messages"].append("Starting student analysis...")

        resume_text = state["student_data"]["resume_text"]
        student_profile = self.student_analyzer.parse_resume_text(resume_text)

        # If GitHub URL is provided, analyze it
        if "github_url" in state["student_data"]:
            try:
                github_analysis = self.student_analyzer.analyze_github_profile(
                    state["student_data"]["github_url"]
                )
                # Merge GitHub insights with student profile
                if "languages" in github_analysis:
                    student_profile.languages.extend(github_analysis["languages"])
                if "skills" in github_analysis:
                    student_profile.skills.extend(github_analysis["skills"])
            except Exception as e:
                state["messages"].append(f"GitHub analysis failed: {str(e)}")

        # If project description is provided, extract skills
        if "project_description" in state["student_data"]:
            try:
                project_analysis = self.student_analyzer.extract_project_skills(
                    state["student_data"]["project_description"]
                )
                if "technologies" in project_analysis:
                    student_profile.skills.extend(project_analysis["technologies"])
            except Exception as e:
                state["messages"].append(f"Project analysis failed: {str(e)}")

        state["student_data"]["profile"] = asdict(student_profile)
        state["current_step"] = "student_analyzed"
        state["messages"].append("Student analysis completed")

        return state

    def evaluate_job_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
        """Node to evaluate job requirements"""
        print("💼 Evaluating job requirements...")

        state["messages"].append("Starting job evaluation...")

        target_role = state["target_role"]

        try:
            # Search for job descriptions
            job_descriptions = self.job_evaluator.search_job_descriptions(target_role)

            if job_descriptions:
                # Store in vector store for future RAG
                self.job_evaluator.store_job_descriptions(job_descriptions, target_role)

                # Extract structured requirements
                job_requirements = self.job_evaluator.extract_job_requirements(job_descriptions)
                state["job_data"] = {
                    "requirements": asdict(job_requirements),
                    "raw_descriptions": job_descriptions
                }
                state["messages"].append(f"Found {len(job_descriptions)} job descriptions")
            else:
                # Fallback: generate generic requirements
                state["job_data"] = {
                    "requirements": {
                        "role": target_role,
                        "required_skills": ["Python", "Machine Learning", "Data Analysis"],
                        "preferred_skills": ["TensorFlow", "PyTorch"],
                        "tools": ["Git", "Docker"],
                        "frameworks": ["Pandas", "NumPy"],
                        "experience_required": "1-3 years",
                        "deliverables": ["Model development", "Data pipeline"],
                        "salary_range": "Not specified"
                    }
                }
                state["messages"].append("Using fallback job requirements")

        except Exception as e:
            state["messages"].append(f"Job evaluation error: {str(e)}")
            # Use fallback requirements
            state["job_data"] = {
                "requirements": {
                    "role": target_role,
                    "required_skills": ["Python", "Problem Solving"],
                    "preferred_skills": ["Machine Learning"],
                    "tools": ["Git"],
                    "frameworks": ["Basic Programming"],
                    "experience_required": "Entry level",
                    "deliverables": ["Software development"],
                    "salary_range": "Not specified"
                }
            }

        state["current_step"] = "job_evaluated"
        state["messages"].append("Job evaluation completed")
        return state

    def analyze_gaps_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
        """Node to analyze gaps"""
        print("📊 Analyzing skill gaps...")

        state["messages"].append("Starting gap analysis...")

        try:
            student_profile = StudentProfile(**state["student_data"]["profile"])
            job_requirements = JobRequirements(**state["job_data"]["requirements"])

            # Calculate skill match
            skill_match = self.gap_analyzer.calculate_skill_match(student_profile, job_requirements)

            # Identify skill gaps
            skill_gaps = self.gap_analyzer.identify_skill_gaps(student_profile, job_requirements)

            # Generate recommendations
            recommendations = self.gap_analyzer.generate_recommendations(
                student_profile, job_requirements, skill_gaps
            )

            state["gap_analysis"] = {
                "skill_match_percentage": skill_match,
                "matched_skills": skill_gaps["matched"],
                "missing_skills": skill_gaps["missing"],
                "recommendations": recommendations
            }

            state["messages"].append(f"Gap analysis completed - {skill_match}% match")

        except Exception as e:
            state["messages"].append(f"Gap analysis error: {str(e)}")
            # Provide fallback analysis
            state["gap_analysis"] = {
                "skill_match_percentage": 50.0,
                "matched_skills": ["python", "programming"],
                "missing_skills": ["advanced skills needed"],
                "recommendations": ["Continue learning and practicing"]
            }

        state["current_step"] = "gaps_analyzed"
        return state

    def generate_report_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
        """Node to generate final report"""
        print("📋 Generating final report...")

        state["messages"].append("Generating final report...")

        try:
            student_name = state["student_data"]["profile"]["name"]
            target_role = state["target_role"]
            gap_analysis = state["gap_analysis"]

            # Create gap analysis object
            gap_report = GapAnalysis(
                student_name=student_name,
                target_role=target_role,
                skill_match_percentage=gap_analysis["skill_match_percentage"],
                matched_skills=gap_analysis["matched_skills"],
                missing_skills=gap_analysis["missing_skills"],
                recommendations=gap_analysis["recommendations"],
                priority_areas=gap_analysis["missing_skills"][:5]  # Top 5 priority areas
            )

            state["final_report"] = asdict(gap_report)
            state["messages"].append("Report generated successfully")

        except Exception as e:
            state["messages"].append(f"Report generation error: {str(e)}")
            # Provide fallback report
            state["final_report"] = {
                "student_name": "Student",
                "target_role": state["target_role"],
                "skill_match_percentage": 0.0,
                "matched_skills": [],
                "missing_skills": [],
                "recommendations": ["Please check your input data"],
                "priority_areas": []
            }

        state["current_step"] = "report_generated"
        return state

    def evaluate_student(self, student_data: Dict[str, Any], target_role: str) -> GapAnalysis:
        """Main method to evaluate student readiness"""

        initial_state = {
            "student_data": student_data,
            "job_data": {},
            "gap_analysis": {},
            "final_report": {},
            "target_role": target_role,
            "current_step": "initialized",
            "messages": []
        }

        # Run the workflow
        final_state = self.app.invoke(initial_state)

        return GapAnalysis(**final_state["final_report"])

In [None]:
def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract text from PDF file"""
    try:
        with open(pdf_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        print(f"Error reading PDF: {e}")
        return ""

def format_report(gap_analysis: GapAnalysis) -> str:
    """Format the gap analysis report"""

    report = f"""
🎓 **Student Readiness Evaluation Report**
===============================================

👨‍🎓 **Student:** {gap_analysis.student_name}
🎯 **Target Role:** {gap_analysis.target_role}
📊 **Skill Match Score:** {gap_analysis.skill_match_percentage}%

✅ **Matched Skills ({len(gap_analysis.matched_skills)}):**
{chr(10).join([f"   • {skill.title()}" for skill in gap_analysis.matched_skills[:10]])}

❌ **Missing Skills ({len(gap_analysis.missing_skills)}):**
{chr(10).join([f"   • {skill.title()}" for skill in gap_analysis.missing_skills[:10]])}

📈 **Recommendations:**
{chr(10).join([f"   {i+1}. {rec}" for i, rec in enumerate(gap_analysis.recommendations)])}

🎯 **Priority Areas to Focus:**
{chr(10).join([f"   • {area.title()}" for area in gap_analysis.priority_areas])}

===============================================
Generated on: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
"""

    return report

# Example usage and testing
def main():
    """Main function to demonstrate the system"""

    print("🚀 Starting Student Readiness Evaluation System...")

    # Initialize the system
    evaluation_system = StudentEvaluationSystem()

    # Sample student data
    sample_student_data = {
        "resume_text": """
        John Doe
        Computer Science Graduate

        Skills: Python, TensorFlow, Machine Learning, Computer Vision, OpenCV
        Experience:
        - Developed object detection system using YOLOv5
        - Built a recommendation system using collaborative filtering
        - Created a web application using Flask and React

        Projects:
        - Autonomous Vehicle Lane Detection using CNN
        - E-commerce Recommendation Engine
        - Stock Price Prediction using LSTM

        Education: B.Tech in Computer Science
        """,
        "github_url": "https://github.com/johndoe",
        "project_description": """
        Developed an autonomous vehicle lane detection system using Convolutional Neural Networks.
        Used OpenCV for image processing, TensorFlow for model training, and deployed the model
        using Flask API. The system achieved 94% accuracy on test dataset.
        """
    }

    target_role = "Computer Vision Engineer"

    try:
        # Run evaluation
        result = evaluation_system.evaluate_student(sample_student_data, target_role)

        # Display results
        print(format_report(result))

        return result

    except Exception as e:
        print(f"Error during evaluation: {e}")
        return None

# Interactive function for Google Colab
def evaluate_student_interactive():
    """Interactive function for easy testing in Google Colab"""

    print("🎓 Student Readiness Evaluation System")
    print("=" * 50)

    # Get user input
    student_name = input("Enter student name: ")
    resume_text = input("Enter resume/profile text: ")
    github_url = input("Enter GitHub URL (optional): ")
    project_description = input("Enter project description (optional): ")
    target_role = input("Enter target job role: ")

    # Prepare student data
    student_data = {
        "resume_text": f"{student_name}\n{resume_text}"
    }

    if github_url.strip():
        student_data["github_url"] = github_url.strip()

    if project_description.strip():
        student_data["project_description"] = project_description.strip()

    # Initialize and run evaluation
    evaluation_system = StudentEvaluationSystem()

    try:
        result = evaluation_system.evaluate_student(student_data, target_role)
        print("\n" + "="*80)
        print(format_report(result))
        return result
    except Exception as e:
        print(f"Error: {e}")
        return None



if __name__ == "__main__":
    print("🎯 Student Readiness Evaluation System Loaded!")
    print("\nAvailable functions:")
    print("1. main() - Run with sample data")
    print("2. evaluate_student_interactive() - Interactive evaluation")
    print("3. run_tests() - Run automated tests")
    print("\nDon't forget to set your API keys:")
    print("- GOOGLE_API_KEY for Gemini")
    print("- TAVILY_API_KEY for Tavily search")

    !streamlit run app.py &> /dev/null &
    url = ngrok.connect(8501)
    print("🔗 Click this link to open your app:", url)

    main()
    evaluate_student_interactive()

🎯 Student Readiness Evaluation System Loaded!

Available functions:
1. main() - Run with sample data
2. evaluate_student_interactive() - Interactive evaluation
3. run_tests() - Run automated tests

Don't forget to set your API keys:
- GOOGLE_API_KEY for Gemini
- TAVILY_API_KEY for Tavily search
🚀 Starting Student Readiness Evaluation System...
🔍 Analyzing student profile...
💼 Evaluating job requirements...
📊 Analyzing skill gaps...
📋 Generating final report...

🎓 **Student Readiness Evaluation Report**

👨‍🎓 **Student:** John Doe
🎯 **Target Role:** Computer Vision Engineer
📊 **Skill Match Score:** 40.0%

✅ **Matched Skills (2):**
   • Python
   • Machine Learning

❌ **Missing Skills (3):**
   • Basic Programming
   • Problem Solving
   • Git

📈 **Recommendations:**
   1. John's profile shows a strong foundation in Computer Vision, but lacks some fundamental and crucial professional skills.  Here's a prioritized plan to bridge the gap:
   2. **1. Solidify Foundational Programming Skills: