In [1]:
import os
import streamlit as st
import PyPDF2
import re
from langchain_openai import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.caches import InMemoryCache
import langchain
from langchain_openai import ChatOpenAI
from langchain_core.globals import set_llm_cache


# Set OpenAI API key from environment variable
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [21]:
def extract_info_from_pdf_new(pdf_file):
    # Read PDF content
    reader = PyPDF2.PdfReader(pdf_file)
    resume_text = ""
    for page in reader.pages:
        resume_text += page.extract_text()

    openai_llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=1000, temperature=0.7)

    prompt_template_resume = PromptTemplate(
        input_variables=["resume_text"],
        template="""
        Extract only the skills section from the given resume. If a skills section is not clearly defined, extract the most relevant information related to skills.
        Provide a maximum of 10 most relevant and important skills.

        Resume:
        {resume_text}

        Please provide the extracted information in the following format:

        Skills:
        [List of up to 10 skills, one per line]
        """,
    )

    # Set up LLMChain for the skills extraction
    extraction_chain = LLMChain(llm=openai_llm, prompt=prompt_template_resume)

    # Get the extracted skills by running the chain
    extracted_skills = extraction_chain.run({"resume_text": resume_text})

    # Extract the skills list, remove hyphens, and limit to 15 skills
    skills_section = extracted_skills.split("Skills:")[1].strip()
    skills_list = [skill.strip().lstrip('- ') for skill in skills_section.split("\n") if skill.strip()][:10]

    # If skills_list is empty, use a default list of skills
    if not skills_list:
        skills_list = ["C++", "Python", "Java"]

    return list(set(skills_list))

In [22]:
skill_list=extract_info_from_pdf_new("Ashish_Resume_ATS.pdf")

In [23]:
skill_list

['Python',
 'Generative AI',
 'MongoDB',
 'OpenAI',
 'FastAPI',
 'Docker',
 'API Integration',
 'AWS',
 'SQL',
 'Flask']