In [1]:
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import get_openai_callback
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from helicone.openai_proxy import openai
from langchain.llms import AzureOpenAI
import os
import pandas as pd


In [2]:
from langchain.llms import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter



In [3]:
import openai
os.environ["LANGCHAIN_TRACING"] = "LANGCHAIN_TRACING"
os.environ["LANGCHAIN_SESSION"] = "LANGCHAIN_SESSION"
openai.api_type  = "openai.api_type"
openai.api_version  = "openai.api_version"
os.environ["OPENAI_API_KEY"]  = "OPENAI_API_KEY"
openai.api_base = "openai.api_base"

In [4]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.chat_models import ChatOpenAI
output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions()


In [5]:
llm = AzureOpenAI( temperature=0, 
    engine="GPT4-8k",
    model_name="gpt-4",
    headers={
      "Helicone-Auth": "",
      "Helicone-User-Id": ""
    })



In [6]:
job_description = """
We're seeking talented Junior Python Engineers to become part of our team!! Our Junior Python Developer wears multiple hats on their projects and is responsible for building and maintaining Python applications and interacting with other developers, designers, and stakeholders on their projects.

You must have the skills to write a plethora of unit tests for your code to guarantee all functionality is being achieved. You will need good collaboration and communication skills as you will be spending a lot of time with experienced engineers and understanding how they do their jobs. Moreover, should have in-depth knowledge of object-relational mapping, familiarity with server-side logic, and above-average Python programming skills.
Roles and Responsibilities
‍

Python application development, testing, and maintenance
Working with other developers, designers, and stakeholders to satisfy project needs.
Troubleshooting and code debugging
Developing and maintaining code and application documentation
Participating in code reviews and contributing to best practices for the team
Maintaining current knowledge of emerging technologies, programming languages, and software development techniques.
Using server-side logic to integrate user-facing aspects
Ensuring client feature requests are being evaluated and prioritized.
Blending data storage options together.
Coordinating with front-end developers.
Optimising existing databases reprogrammed to increase functionality.
Creating digital technologies to track web traffic.
Improve responsiveness and overall performance by developing back-end components.
Enhance the functionality of existing systems.
Deploy data security and protection solutions.
‍
Requirements and Skills
‍
Expertise in Python and associated frameworks such as Django and Flask.
Comprehensive knowledge of Python's multi-process architecture and threading restrictions.
Knowledge of server-side templating languages such as Jinja 2 and Mako.
Understanding of integration of many data sources into a single system.
Ability to work collaboratively on projects as well as independently when necessary.
Quick learner with the capacity to modify approaches dependent on the situation.
Awareness of how to test Python programs successfully.
Excellent communication and teamwork abilities.
Knowledge of front-end technology (like JavaScript and HTML5)
Excellent problem-solving abilities
Bachelor of Science in Computer Science, Engineering, or a related discipline.
Understanding front-end technologies such as JavaScript, HTML5, and CSS3 is required.
Knowledge of some ORM (Object Relational Mapper) libraries
Capable of combining many data sources and databases into a single system
User authentication and authorization knowledge are required across many systems, servers, and environments.

"""


In [7]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [53]:
def get_string_after_colon(string):
  """Returns the string after the colon character in the given string."""
  colon_index = string.find(":")
  if colon_index == -1:
    return ""
  else:
    return string[colon_index + 1:]


In [9]:
llm = AzureOpenAI( temperature=0, 
    engine="GPT4-8k",
    model_name="gpt-4",
    headers={
      "Helicone-Auth": "",
      "Helicone-User-Id": ""
    })

In [10]:
with get_openai_callback() as cb:
    prompt = f"""
               You are an experienced HR recruiter, your role involves receiving a ```{job_description}``` for a specific position. 
               Your primary responsibility is to compile a list of essential requirements and skills mentioned in the job description. 
               Kindly provide the main points in the form of a string list.
              """
    response = llm(prompt)
    print(response[0])
    print(response)
# Split the text into separate lines based on the bullet pattern
    lines = response.split('\n')
# Remove empty lines (if any)
    lines = [line.strip() for line in lines if line.strip()]
# Create a list by splitting each line on the numbering and discarding the numbering itself
    expertise_list = [line.split('- ')[1] for line in lines]
    print(len(expertise_list))
    print(expertise_list[0])
    print(cb)



-
- Python application development, testing, and maintenance
- Expertise in Python and associated frameworks such as Django and Flask
- Comprehensive knowledge of Python's multi-process architecture and threading restrictions
- Knowledge of server-side templating languages such as Jinja 2 and Mako
- Understanding of integration of many data sources into a single system
- Ability to work collaboratively on projects as well as independently when necessary
- Quick learner with the capacity to modify approaches dependent on the situation
- Awareness of how to test Python programs successfully
- Excellent communication and teamwork abilities
- Knowledge of front-end technology (like JavaScript and HTML5)
- Excellent problem-solving abilities
- Bachelor of Science in Computer Science, Engineering, or a related discipline
- Understanding front-end technologies such as JavaScript, HTML5, and CSS3
- Knowledge of some ORM (Object Relational Mapper) libraries
- Capable of combining many data sour

In [11]:
with get_openai_callback() as cb:
    pdf_folder_path = "data/"
    loaders = [UnstructuredPDFLoader(os.path.join(pdf_folder_path, fn)) for fn in os.listdir(pdf_folder_path)]

    print(cb)

Tokens Used: 0
	Prompt Tokens: 0
	Completion Tokens: 0
Successful Requests: 0
Total Cost (USD): $0.0


In [12]:
text_splitter = CharacterTextSplitter(        
    separator = "\n\n",
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,
)

In [36]:
persist_directory = 'db'
import ast
import pandas as pd
convert_string_into_list_array=[]
for document in loaders:
    data = document.load()
    texts = text_splitter.split_documents(data)
    with get_openai_callback() as cb:
        query = f'''
             Your task is to extract answer from ```{texts}```.
             Question : Can you give me the answer What is the Name of Candidate,their Gender(if gender is present then give gender as Male/Female otherwise pridict the gender as per the name) and their address/location give this one by one in list?
                        Can you provide me with the answer whether the candidate possesses each of the skills mentioned in the list ```{expertise_list}```?
              
             Example: ['Ramkrishan Pal', 'Male/Female','Bhapal,Mandideep', "```{expertise_list[0]}``` : Yes(add a small comment with it if the skill is present)/No(add a small comment with it if the skill is present)", 'Skill Matched Percentage :50%', ```{data[0].metadata['source']}``` ]
             Please provide the answers in the form of 'Yes' (with a small comment if the skill is present) or 'No' (if the candidate does not have the skill). Check each candidate one by one, and proceed to the next candidate thereafter. The responses should be presented in the form of a list of strings. I want answer in list only.
             '''
        llm_response = llm(query)
        convert_string_into_list=ast.literal_eval(llm_response)
        convert_string_into_list_array.append(convert_string_into_list)
        print(cb)





Tokens Used: 1630
	Prompt Tokens: 1288
	Completion Tokens: 342
Successful Requests: 1
Total Cost (USD): $0.059160000000000004




Tokens Used: 1594
	Prompt Tokens: 1215
	Completion Tokens: 379
Successful Requests: 1
Total Cost (USD): $0.059190000000000006




Tokens Used: 1653
	Prompt Tokens: 1277
	Completion Tokens: 376
Successful Requests: 1
Total Cost (USD): $0.060869999999999994




Tokens Used: 1608
	Prompt Tokens: 1299
	Completion Tokens: 309
Successful Requests: 1
Total Cost (USD): $0.05750999999999999


In [50]:
for detail in convert_string_into_list_array:
    print(detail[0])
    print(detail[1])
    print(detail[2])
    print(detail[3])
    print(detail[4])
    print(detail[5])
    print(detail[6])
    print(detail[7])
    print(detail[8])
    print(detail[9])
    print(detail[10])
    print(detail[11])
    print(detail[12])
    print(detail[13])
    print(detail[14])
    print(detail[15])
    print(detail[16])
    print(detail[17])
    print(detail[18])
    print(detail[19])
    print(detail[20])


Jane Doe
Male
Plot no. 122 Udaypur, UP
Python application development, testing, and maintenance : No
Expertise in Python and associated frameworks such as Django and Flask : No
Comprehensive knowledge of Python's multi-process architecture and threading restrictions : No
Knowledge of server-side templating languages such as Jinja 2 and Mako : No
Understanding of integration of many data sources into a single system : No
Ability to work collaboratively on projects as well as independently when necessary : Yes (Excellent Teamwork and Collaboration)
Quick learner with the capacity to modify approaches dependent on the situation : Yes (Demonstrated ability to learn new technologies quickly and adapt to changing project requirements)
Awareness of how to test Python programs successfully : No
Excellent communication and teamwork abilities : Yes (Strong Written and Verbal Communication)
Knowledge of front-end technology (like JavaScript and HTML5) : Yes (HTML5, CSS3, and JavaScript)
Excellent

In [37]:
data_list = []
for detail in convert_string_into_list_array:
    data = {
        "Name": detail[0],
        "Gender": detail[1],
        "Location": detail[2],
        "Percentage": detail[-2].split(':')[1].strip(),
        "Path": detail[-1].strip(),
    }
    for i, exp in enumerate(expertise_list):
        expertise, value = detail[i + 3].split(': ')
        data[exp] = value.strip()

    data_list.append(data)

# Create the pandas DataFrame
df = pd.DataFrame(data_list)

# Display the DataFrame
display(df)

Unnamed: 0,Name,Gender,Location,Percentage,Path,"Python application development, testing, and maintenance",Expertise in Python and associated frameworks such as Django and Flask,Comprehensive knowledge of Python's multi-process architecture and threading restrictions,Knowledge of server-side templating languages such as Jinja 2 and Mako,Understanding of integration of many data sources into a single system,...,Quick learner with the capacity to modify approaches dependent on the situation,Awareness of how to test Python programs successfully,Excellent communication and teamwork abilities,Knowledge of front-end technology (like JavaScript and HTML5),Excellent problem-solving abilities,"Bachelor of Science in Computer Science, Engineering, or a related discipline","Understanding front-end technologies such as JavaScript, HTML5, and CSS3",Knowledge of some ORM (Object Relational Mapper) libraries,Capable of combining many data sources and databases into a single system,"User authentication and authorization knowledge across many systems, servers, and environments"
0,Jane Doe,Male,"Plot no. 122 Udaypur, UP",47%,Path: data/JaneDoeResume.pdf,No,No,No,No,No,...,Yes (Demonstrated ability to learn new technol...,No,Yes (Strong Written and Verbal Communication),"Yes (HTML5, CSS3, and JavaScript)",Yes (Problem-solving Abilities),Yes (Bachelor of Science in Computer Science),"Yes (HTML5, CSS3, and JavaScript)",Yes (Hibernate ORM),No,No
1,John Smith,Male,"125 Main Street, Bhopal, MP",73.33%,Path: data/JohnSmithResume.pdf,Yes (Junior Python Engineer),Yes (Utilized Python expertise and frameworks ...,No,No,Yes (Combined data sources and databases using...,...,Yes (Quick Learner),Yes (Python Program Testing),Yes (Teamwork and Collaboration),Yes (JavaScript and HTML5),Yes (Problem-solving Abilities),Yes (Bachelor of Science in Computer Science),"Yes (JavaScript, HTML5, and CSS3)",Yes (ORM Libraries),Yes (Data Sources and Databases Combining),Yes (User Authentication and Authorization Kno...
2,Raspreet Singh Pasrija,Male,"Than Khamharia, Bemetara, Chhattisgarh - 491338",33%,Path: data/Raspreet_Singh_Pasrija_Resume_E7D70...,No,No (Familiar with Python and Django),No,No,No,...,Yes (Learned and explored various technologies...,No,Yes (Worked in teams during internships),"Yes (Skills include HTML, JavaScript, and Reac...",No,Yes (Bachelor of Engineering in CSE),"Yes (Skills include HTML, JavaScript, CSS, and...",No,No,No
3,Vijay Koppadi,Male,"Hyderabad, India",25%,Path: data/vijay's_Resume_A2F18103E1.pdf,Yes (Python mentioned),No,No,No,No,...,No,No,No,Yes (HTML5 and JavaScript mentioned),No,Yes (Bachelor in Engineering),"Yes (HTML5, CSS3, and JavaScript mentioned)",No,No,No


In [41]:
from langchain.agents import create_pandas_dataframe_agent
from langchain.chat_models import ChatOpenAI
from langchain.agents.agent_types import AgentType

In [42]:
agent = create_pandas_dataframe_agent(llm, df, verbose=True)

In [46]:
agent.run("Identify 2 candidates with B.E. degree. Give their path also.")





[1m> Entering new  chain...[0m




[32;1m[1;3mThought: I need to filter the dataframe to find candidates with B.E. degree and then extract their names and paths.
Action: python_repl_ast
Action Input: df[df['Bachelor of Science in Computer Science, Engineering, or a related discipline'].str.contains("B.E.")][0m
Observation: [36;1m[1;3mEmpty DataFrame
Columns: [Name, Gender, Location, Percentage, Path, Python application development, testing, and maintenance, Expertise in Python and associated frameworks such as Django and Flask, Comprehensive knowledge of Python's multi-process architecture and threading restrictions, Knowledge of server-side templating languages such as Jinja 2 and Mako, Understanding of integration of many data sources into a single system, Ability to work collaboratively on projects as well as independently when necessary, Quick learner with the capacity to modify approaches dependent on the situation, Awareness of how to test Python programs successfully, Excellent communication and teamwork abi



[32;1m[1;3mIt seems that there are no candidates with a B.E. degree in the dataframe. I should check for other variations of the degree name, such as "Bachelor of Engineering".
Action: python_repl_ast
Action Input: df[df['Bachelor of Science in Computer Science, Engineering, or a related discipline'].str.contains("Bachelor of Engineering")][0m
Observation: [36;1m[1;3m                     Name Gender  \
2  Raspreet Singh Pasrija   Male   

                                          Location Percentage  \
2  Than Khamharia, Bemetara, Chhattisgarh - 491338        33%   

                                                Path  \
2  Path: data/Raspreet_Singh_Pasrija_Resume_E7D70...   

  Python application development, testing, and maintenance  \
2                                                 No         

  Expertise in Python and associated frameworks such as Django and Flask  \
2               No (Familiar with Python and Django)                       

  Comprehensive knowledge of P



[32;1m[1;3mI found one candidate with a "Bachelor of Engineering" degree. I should check for other variations of the degree name, such as "B.Eng".
Action: python_repl_ast
Action Input: df[df['Bachelor of Science in Computer Science, Engineering, or a related discipline'].str.contains("B.Eng")][0m
Observation: [36;1m[1;3mEmpty DataFrame
Columns: [Name, Gender, Location, Percentage, Path, Python application development, testing, and maintenance, Expertise in Python and associated frameworks such as Django and Flask, Comprehensive knowledge of Python's multi-process architecture and threading restrictions, Knowledge of server-side templating languages such as Jinja 2 and Mako, Understanding of integration of many data sources into a single system, Ability to work collaboratively on projects as well as independently when necessary, Quick learner with the capacity to modify approaches dependent on the situation, Awareness of how to test Python programs successfully, Excellent communica



'Raspreet Singh Pasrija, Path: data/Raspreet_Singh_Pasrija_Resume_E7D708CA20.pdf'

In [47]:
agent.run("Identify 2 candidates with more than 2 years of experience with AWS. Give their path also.")





[1m> Entering new  chain...[0m




[32;1m[1;3mThought: To find candidates with more than 2 years of experience with AWS, I need to check the dataframe for any mention of AWS experience. However, the provided dataframe does not have a column for AWS experience. I will first check the columns of the dataframe to confirm this.

Action: python_repl_ast
Action Input: list(df.columns)[0m
Observation: [36;1m[1;3m['Name', 'Gender', 'Location', 'Percentage', 'Path', 'Python application development, testing, and maintenance', 'Expertise in Python and associated frameworks such as Django and Flask', "Comprehensive knowledge of Python's multi-process architecture and threading restrictions", 'Knowledge of server-side templating languages such as Jinja 2 and Mako', 'Understanding of integration of many data sources into a single system', 'Ability to work collaboratively on projects as well as independently when necessary', 'Quick learner with the capacity to modify approaches dependent on the situation', 'Awareness of how to te



'It is not possible to identify candidates with more than 2 years of experience with AWS using the provided dataframe.'

In [51]:
agent.run("Identify candidates those lived in M.P. Give their path also.")





[1m> Entering new  chain...[0m




[32;1m[1;3mThought: I need to filter the dataframe to get the rows where the Location contains "MP" and then get the Name and Path columns for those rows.

Action: python_repl_ast
Action Input: df[df['Location'].str.contains("MP")][['Name', 'Path']][0m
Observation: [36;1m[1;3m         Name                            Path
1  John Smith  Path: data/JohnSmithResume.pdf[0m
Thought:[32;1m[1;3mI now know the final answer.
Final Answer: The candidate who lives in M.P. is John Smith, and their resume path is "Path: data/JohnSmithResume.pdf".[0m

[1m> Finished chain.[0m




'The candidate who lives in M.P. is John Smith, and their resume path is "Path: data/JohnSmithResume.pdf".'

In [52]:
with get_openai_callback() as cb:
    agent.run("Identify 2 candidates their resume higly Matched with job requirments. Give their path also.")
    print(cb)





[1m> Entering new  chain...[0m




[32;1m[1;3mThought: To find the candidates with the highest match percentage, I need to sort the dataframe by the 'Percentage' column in descending order and then select the top 2 candidates.
Action: python_repl_ast
Action Input: df.sort_values(by='Percentage', ascending=False).head(2)[0m
Observation: [36;1m[1;3m         Name Gender                     Location Percentage  \
1  John Smith   Male  125 Main Street, Bhopal, MP     73.33%   
0    Jane Doe   Male     Plot no. 122 Udaypur, UP        47%   

                             Path  \
1  Path: data/JohnSmithResume.pdf   
0    Path: data/JaneDoeResume.pdf   

  Python application development, testing, and maintenance  \
1                       Yes (Junior Python Engineer)         
0                                                 No         

  Expertise in Python and associated frameworks such as Django and Flask  \
1  Yes (Utilized Python expertise and frameworks ...                       
0                                    



Tokens Used: 3170
	Prompt Tokens: 3053
	Completion Tokens: 117
Successful Requests: 2
Total Cost (USD): $0.09861
