In [1]:
!pip install openai
!pip install streamlit
!pip install python-docx
!pip install pypdf

Collecting openai
  Downloading openai-1.44.0-py3-none-any.whl.metadata (22 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.44.0-py3-none-any.whl (367 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m367.8/367.8 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K   [90m━

In [5]:
import openai
from openai import AuthenticationError
import streamlit as st
import os

In [11]:
os.environ['OPENAI_API_KEY'] = "ENTER YOUR KEY HERE"

In [8]:
def read_document(file):
    """
    Read file of the format .pdf, .docx and .txt and returns the data of that file.

    Args:
        file: Resume from which the information has to be extracted.

    Returns:
        data: text content of the resume
    """

    name, extension = os.path.splitext(file)

    # PDF file loader using pypdf
    if extension == '.pdf':
        from pypdf import PdfReader
        reader = PdfReader(file)
        data = ""
        for page in reader.pages:
            data += page.extract_text()

    # DOCX file loader using python-docx
    elif extension == '.docx':
        from docx import Document
        doc = Document(file)
        data = "\n".join([para.text for para in doc.paragraphs])

    # TXT file loader using built-in open
    elif extension == '.txt':
        with open(file, 'r', encoding='utf-8') as f:
            data = f.read()

    else:
        print('Document format not supported!')
        return None

    return data

In [9]:
def extractor(resume_data):
    """
    Extracts information from the resume_data using 'gpt-4o-mini' and returns it in JSON format.

    Args:
        resume_data (str): The text content of the resume.

    Returns:
        extracted_data: containing the extracted information in json format.
    """

    prompt = '''
    You are an AI bot designed to act as a professional for parsing resumes. You are given a resume and your job is to extract the following information from the resume:
    1. Name
    2. Contact Information (Email, Phone Number)
    3. Professional Summary
    4. Work Experience
    5. Education
    6. Skills
    7. Certifications

    Provide the extracted information in JSON format.

    If the file does not contain some of the sections, then do not request further information for that and apart from the information return a message: "It seem some of the information is missing from the resume"

    If the file does not contain any valid sections, return the fallback message: "Sorry, it seems this is not a resume. Try uploading a different file.".
    REMEMBER YOU ARE NOT CAPABLE OF SOLVING ANY OTHER QUERIES AND MESSAGES ARE NOT PART OF JSON FORMAT."
    '''

    # Initialize OpenAI client (assuming correct API key handling)
    client = openai.OpenAI()
    user_content = resume_data
    messages = [
        {"role": "system", "content": prompt},
        {"role": "user", "content": user_content}
    ]

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        temperature=0.0,
        max_tokens=1500
    )

    # Extract content from the response
    extracted_data = response.choices[0].message.content

    return extracted_data

In [12]:
data = read_document('YOUR RESUME FILE')
extracted_data = extractor(data)

In [None]:
print(extracted_data)