In [1]:
import os
from typing import Literal, List
from pydantic import BaseModel
import tiktoken
import PyPDF2
from docx import Document
import re
from openai import OpenAI
import json


In [13]:
os.environ["OPENAI_API_KEY"] = "add_your_key"

In [3]:
class CriterionAnalysis(BaseModel):
    criterion: str
    achievements: List[str]
    explanation: str

class OutputSchema(BaseModel):
    criteria_analysis: List[CriterionAnalysis]
    overall_summary: str
    rating: Literal['low', 'medium', 'high']
    
class CandidateCV(BaseModel):
    cv:str

In [36]:
# helper functions
def read_cv(file_path: str) -> str:
    _, file_extension = os.path.splitext(file_path)
    
    if file_extension.lower() == '.pdf':
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ' '.join(page.extract_text() for page in reader.pages)
    
    elif file_extension.lower() in ['.docx', '.doc']:
        doc = Document(file_path)
        text = ' '.join(paragraph.text for paragraph in doc.paragraphs)
    
    else:
        raise ValueError("Unsupported file format")
    
    # Clean up the text
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with single space
    text = re.sub(r'\n+', '\n', text)  # Replace multiple newlines with single newline
    text = text.strip()  # Remove leading/trailing whitespace
    
    return text



def num_tokens_from_string(string: str, encoding_name: str = "cl100k_base") -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

def parse_and_format_output(output):
    if isinstance(output, dict):
        # Already structured JSON
        data = output
    else:
        # Try to extract JSON from string
        json_match = re.search(r'```json\n(.*?)\n```', output, re.DOTALL)
        if json_match:
            try:
                data = json.loads(json_match.group(1))
            except json.JSONDecodeError:
                print("Error: Unable to parse JSON content.")
                print("Raw content:")
                print(output)
                return
        else:
            print("No JSON content found. Raw output:")
            print(output)
            return

    # Format and print the output
    try:
        print("Criteria Analysis:")
        for analysis in data['criteria_analysis']:
            print(f"\n{analysis['criterion']}:")
            if analysis['achievements']:
                print("  Achievements:")
                for achievement in analysis['achievements']:
                    print(f"    - {achievement}")
            else:
                print("  No specific achievements listed.")
            print(f"  Explanation: {analysis['explanation']}")
        
        print(f"\nOverall Summary: {data['overall_summary']}")
        print(f"Rating: {data['rating'].capitalize()}")
    except KeyError as e:
        print(f"Error: Missing expected key in data structure: {e}")
        print("Raw content:")
        print(json.dumps(data, indent=2))


In [5]:
file_path = "/Users/bnayak/Downloads/CV_BN_08.pdf"  # or "path/to/your/cv.docx"
cv_text = read_cv(file_path)
candidate_cv = CandidateCV(cv=cv_text)

In [7]:
prompt = f"""
You are an immigration expert evaluating candidates for O-1A Visa eligibility. An O-1A visa is granted to individuals with extraordinary ability in sciences, education, business, or athletics (excluding arts, motion pictures, or television industry).

Given a candidate's CV and 8 criteria, your tasks are:
1) Match the candidate's achievements to each of the 8 O-1A criteria.
2) Rate the candidate's overall chances of O-1A visa qualification.

For each criterion, provide:
- Relevant achievements from the CV
- A brief explanation of how these achievements relate to the criterion

Use the following rating scale:
- Low: Meets 0-2 criteria convincingly
- Medium: Meets 3-4 criteria convincingly
- High: Meets 5 or more criteria convincingly

Output your analysis in this JSON structure:
{OutputSchema.model_dump_json(mode='json_schema') if hasattr(OutputSchema, 'model_dump_json') else OutputSchema.schema_json()}

The 8 criteria are:
<Criterions>
<Awards> Documentation of the beneficiary’s receipt of nationally or internationally recognized prizes or awards for excellence in the field of endeavor.</Awards>
<Membership> Documentation of the beneficiary’s membership in associations in the field for which classification is sought, which require outstanding achievements of their members, as judged by recognized national or international experts in their disciplines or fields.</Membership>
<Press> Published material in professional or major trade publications or major media about the beneficiary, relating to the beneficiary's work in the field for which classification is sought. This evidence must include the title, date, and author of such published material and any necessary translation.</Press>
<Judging> Evidence of the beneficiary's participation on a panel, or individually, as a judge of the work of others in the same or in an allied field of specialization for which classification is sought. </Judging>
<Original Contribution> Evidence of the beneficiary's original scientific, scholarly, or business-related contributions of major significance in the field.</Original Contribution>
<Scholarly Articles> Evidence of the beneficiary's authorship of scholarly articles in the field, in professional journals, or other major media.</Scholarly Articles>
<Critical Employment> Evidence that the beneficiary has been employed in a critical or essential capacity for organizations and establishments that have a distinguished reputation. </Critical Employment>
<High Remuneration>	Evidence that the beneficiary has either commanded a high salary or will command a high salary or other remuneration for services as evidenced by contracts or other reliable evidence.</High Remuneration>
</Criterions>

Candidate's CV:
<candidate_cv>
{candidate_cv.cv}
</candidate_cv>

Analyze the CV thoroughly and provide your evaluation based on these criteria.

""".strip()

In [8]:
token_count = num_tokens_from_string(prompt)
print(f"The prompt contains {token_count} tokens.")

The prompt contains 2482 tokens.


In [None]:
print(prompt)

In [14]:
# Set your OpenAI API key
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


In [30]:
def call_openai_api(prompt: str, model: str = "gpt-4o-mini") -> dict:
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are an expert immigration consultant."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=2000
        )
        content = response.choices[0].message.content
        
        try:
            return json.loads(content)
        except json.JSONDecodeError:
            print("Warning: The API response was not valid JSON. Returning raw content.")
            return content
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None


In [31]:
# Use the function
result = call_openai_api(prompt)



In [37]:
if result:
    parse_and_format_output(result)

Criteria Analysis:

Awards:
  Achievements:
    - AI Patent 'WO2021237019A1' - Environmental Adjustment using AI (Published)
    - Gartner Data & Analytics Excellence Awards
  Explanation: The candidate has received a patent for an AI invention and has also been recognized with a prestigious award in the field of Data & Analytics, indicating excellence and innovation.

Membership:
  No specific achievements listed.
  Explanation: There is no documentation provided in the CV indicating membership in associations requiring outstanding achievements.

Press:
  No specific achievements listed.
  Explanation: The CV does not mention any published material or media coverage relating to the candidate's work in the field.

Judging:
  No specific achievements listed.
  Explanation: There is no evidence of the candidate participating as a judge in any relevant panels or competitions.

Original Contribution:
  Achievements:
    - Developed and designed an in-house Gen AI Agentic framework for crea