In [None]:
import os
import logging
import json
import re
import PyPDF2
from typing import Dict, Any
from transformers import pipeline
from langchain.agents import Tool
from langchain.utilities import DuckDuckGoSearchAPIWrapper
from langchain.llms import OpenAI
from langchain.agents import load_tools

from crewai import Agent, Task, Crew, Process

from langchain.tools import HumanInputRun

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = ""

# Initialize DuckDuckGo Search Tool (make sure it has the proper structure)
# search_tool = Tool(
#     name="DuckDuckGo Search",
#     func=DuckDuckGoSearchAPIWrapper().run,
#     description="A tool for searching the web using DuckDuckGo."
# )

search_tool = Tool(
    name="DuckDuckGo Search",
    func=DuckDuckGoSearchAPIWrapper().run,
    description="A tool for searching the web using DuckDuckGo."
)

search_query = "AI Developer job posting"
search_tool_input = {"query": search_query}

search_tool.run(search_tool_input)

# Initialize OpenAI Tool (wrap OpenAI correctly as a tool)
openai_tool = Tool(
    name="OpenAI",
    func=lambda query: llm.run(query),
    description="A tool for generating text using OpenAI's language model."
)

human_tool = Tool(
    name="Human Input",
    func=HumanInputRun().run,
    description="A tool that interacts with humans to get input during the process."
)


llm = OpenAI()

# Resume parsing fun
def extract_text_from_pdf(pdf_path: str) -> str:
    try:
        with open(pdf_path, "rb") as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        logger.error(f"Error extracting text from PDF: {e}")
        return ""

# Initialize NER
ner_pipeline = pipeline("ner", model='dbmdz/bert-large-cased-finetuned-conll03-english')

def parse_resume(resume_text: str) -> Dict[str, Any]:
    entities = ner_pipeline(resume_text)

    parsed_data = {
        "Name": [],
        "Job Title": [],
        "Company": [],
        "Email": [],
        "Skills": [],
        "Education": [],
        "Experience": []
    }

    current_entity = None
    current_text = ""

    for entity in entities:
        if entity['entity'].startswith('B-'):
            if current_entity:
                parsed_data[current_entity].append(current_text.strip())
            current_entity = entity['entity'][2:]
            current_text = entity['word']
        elif entity['entity'].startswith('I-') and current_entity:
            current_text += " " + entity['word']
        else:
            if current_entity:
                parsed_data[current_entity].append(current_text.strip())
            current_entity = None
            current_text = ""

        if '@' in entity['word']:
            parsed_data["Email"].append(entity['word'])


    for key in parsed_data:
        parsed_data[key] = list(set(parsed_data[key]))
        if len(parsed_data[key]) == 1:
            parsed_data[key] = parsed_data[key][0]


    skills = re.findall(r'\b(?:Python|Java|C\+\+|JavaScript|Machine Learning|NLP|AI|Pyspark|LLM|GenAI)\b', resume_text)
    parsed_data["Skills"] = list(set(skills))

    return parsed_data


tech_job_researcher = Agent(
    role='Tech Job Researcher',
    goal='Analyze job postings to extract key requirements and skills',
    backstory="You are an AI specialized in analyzing tech job postings. Your expertise lies in identifying crucial skills, qualifications, and requirements for various tech positions.",
    verbose=True,
    allow_delegation=False,
    tools=[search_tool, openai_tool]  # Now using the wrapped OpenAI tool
)

personal_profiler = Agent(
    role='Personal Profiler',
    goal='Examine candidate profiles and personal statements to extract key information',
    backstory="You are an AI expert in understanding and profiling individuals based on their resumes, GitHub profiles, and personal statements. You excel at identifying unique skills and experiences.",
    verbose=True,
    allow_delegation=False,
    tools=[openai_tool, human_tool]  # Using the wrapped OpenAI tool and human tool
)

resume_strategist = Agent(
    role='Resume Strategist',
    goal='Align candidate experiences with job requirements to create tailored resumes',
    backstory="You are an AI specialized in resume optimization. Your skill lies in matching candidate profiles with job requirements to create highly effective, tailored resumes.",
    verbose=True,
    allow_delegation=True,
    tools=[openai_tool]
)

interview_preparer = Agent(
    role='Interview Preparer',
    goal='Generate targeted interview materials based on job requirements and candidate profiles',
    backstory="You are an AI expert in preparing candidates for interviews. You excel at creating targeted questions and materials that align with both the job requirements and the candidate's profile.",
    verbose=True,
    allow_delegation=False,
    tools=[openai_tool]
)

# Define Tasks# Define AI Agents and Tasks
# Define AI Agents and Tasks
analyze_job_posting = Task(
    description="Analyze the given job posting URL and extract key requirements and skills.",
    agent=tech_job_researcher,
    expected_output="A string containing the job requirements, skills, location, job title, and company."
)

profile_candidate = Task(
    description="Examine the candidate's resume, GitHub profile, and personal statement to create a comprehensive profile.",
    agent=personal_profiler,
    expected_output="A string containing the candidate's profile, including name, job title, skills, education, experience, and email."
)

create_tailored_resume = Task(
    description="Using the job requirements and candidate profile, create a tailored resume.",
    agent=resume_strategist,
    expected_output="A string containing the tailored resume for the candidate."
)

prepare_interview_materials = Task(
    description="Generate interview questions and preparation materials based on the job requirements and candidate profile.",
    agent=interview_preparer,
    expected_output="A string containing interview questions and preparation materials for the candidate."
)


# Create Crew
resume_parsing_crew = Crew(
    agents=[tech_job_researcher, personal_profiler, resume_strategist, interview_preparer],
    tasks=[analyze_job_posting, profile_candidate, create_tailored_resume, prepare_interview_materials],
    verbose=True,  # Set verbose to True for detailed logging
    process=Process.sequential
)

# Main execution
if __name__ == "__main__":
    # Example usage
     job_posting_url = "https://www.lockheedmartinjobs.com/job/king-of-prussia/ai-machine-learning-engineer-stf/694/74728384000?utm_campaign=google_jobs_apply&utm_source=google_jobs_apply&utm_medium=organic"
    resume_pdf_path = "/content/test/resume.pdf"  # Replace with actual path
    github_profile = "https://github.com/xyz"  # Replace with actual profile URL
       personal_statement = "I am a passionate Data scientist with 10 years of experience..."


    # Extract resume text
    resume_text = extract_text_from_pdf(resume_pdf_path)

    if resume_text:
        # Parse resume
        if resume_text:
            parsed_resume = parse_resume(resume_text)
            print("Parsed Resume Data:")
            print(json.dumps(parsed_resume, indent=2))
        else:
            print("Failed to extract text from the PDF.")


# Search for job posting
        search_query = "AI Machine learning engineer job posting"
        search_input = {"query": search_query}

        search_tool = Tool(
            name="DuckDuckGo Search",
            func=DuckDuckGoSearchAPIWrapper().run,
            description="A tool for searching the web using DuckDuckGo."
        )

        try:
            job_posting_result = search_tool.run(search_input)
            print(f"Job Posting Result: {job_posting_result}")
        except Exception as e:
            print(f"Error during job posting search: {e}")

# Now, proceed with the tasks and agents, making sure to validate inputs before passing them

        # Run the crew
        result = resume_parsing_crew.kickoff(
            inputs={
                "job_posting_url": job_posting_url,
                "parsed_resume": parsed_resume,
                "github_profile": github_profile,
                "personal_statement": personal_statement
            }
        )

        print("\nCrew Execution Result:")
        print(result)
    else:
        print("Failed to extract text from the PDF.")
