In [1]:
import os
from openai import OpenAI

client = OpenAI(api_key = os.getenv("OPENAI_TEST_KEY"))


In [2]:
os.listdir()

['chroma', 'data', 'test_openAI_API.ipynb']

In [3]:
# Read the pdf with the CV information
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("./data/CV_AntonioOchotorena_092024.pdf")
pages = loader.load()
pages

[Document(metadata={'source': './data/CV_AntonioOchotorena_092024.pdf', 'page': 0}, page_content='ANTONIO OCHOTORENA  LAYNEZ  \n+34 636 426 538  • antonioochotorena@gmail.com  • LinkedIn  • GitHub  \n \nSummary   \n \nActively seeking a full -time opportunity in data and AI in Amsterdam.  My background in consulting \nand data science has equipped me with the skills to excel in a position where effective \ncommunication and technical expertise are essential . What sets me apart is my strong drive to learn, \nproactive approach to solving complex problems, and commitment to fostering a collaborative team \nenvironment where we support and help each other to achieve shared goals.  \n \nExperience   \n \nInfosys  Instep – Global Internship Program  Bangalore, India  \nAI Software Engineer  06/2024 – Now  \n \n• Developed a proprietary information retrieval module utilizing Large Language Models \n(LLMs) and OCR technologies to extract healthcare forms, achieving substantial cost savings. 

In [4]:
# Split the text 
from langchain.text_splitter import CharacterTextSplitter
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=500,
    chunk_overlap=150,
    length_function=len
)

In [5]:
docs = text_splitter.split_documents(pages)

In [6]:
docs

[Document(metadata={'source': './data/CV_AntonioOchotorena_092024.pdf', 'page': 0}, page_content='ANTONIO OCHOTORENA  LAYNEZ  \n+34 636 426 538  • antonioochotorena@gmail.com  • LinkedIn  • GitHub  \n \nSummary   \n \nActively seeking a full -time opportunity in data and AI in Amsterdam.  My background in consulting \nand data science has equipped me with the skills to excel in a position where effective \ncommunication and technical expertise are essential . What sets me apart is my strong drive to learn,'),
 Document(metadata={'source': './data/CV_AntonioOchotorena_092024.pdf', 'page': 0}, page_content='communication and technical expertise are essential . What sets me apart is my strong drive to learn, \nproactive approach to solving complex problems, and commitment to fostering a collaborative team \nenvironment where we support and help each other to achieve shared goals.  \n \nExperience   \n \nInfosys  Instep – Global Internship Program  Bangalore, India  \nAI Software Engineer 

# store it to Chroma DB

In [7]:
# Store it into a ChromaDB database
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings(api_key = os.getenv("OPENAI_TEST_KEY"))

  embedding = OpenAIEmbeddings(api_key = os.getenv("OPENAI_TEST_KEY"))


In [8]:
from langchain.vectorstores import Chroma

PERSIST_DIR = './chroma/'

In [10]:
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    persist_directory=PERSIST_DIR
)

# Prompt

### TODO create a Summariser of job requirements

In [20]:
offer_requirements = """
<req1> AI experience </req1>
<req2> MLOps </req2>
<req3> Backend developer </req3>
"""


# Question

In [23]:
question = f"""
Use the following pieces of context to create a cover letter for the following job offer. 
{offer_requirements}
If the applicant has knowledge gaps from the job offer: 
    1. Compare it to similar skills he has.
    2. If there are no similar exclude them from the cover letter and metion them at the end.

Don't try to make up an answer. 
Use a letter format with three paragraphs maximum. 
Keep the answer clear, concise and semi-formal as possible.
Do not over extend with adjectives.
"""

In [24]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """Use the following pieces of context to answer the question
{context}
Question: {question}
Cover letter: Fill in the letter here
Knowledge Gaps: Add Knowledge Gaps if Any
"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [26]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0, api_key = os.getenv("OPENAI_TEST_KEY"))
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [27]:
result = qa_chain({"query": question})

  result = qa_chain({"query": question})


In [29]:
print(result["result"])

Dear Hiring Manager,

I am writing to express my interest in the AI, MLOps, and Backend Developer position at your company. With my experience as an AI Software Engineer at Infosys, where I developed proprietary information retrieval modules using Large Language Models, I believe I have the technical expertise required for this role. My proactive approach to problem-solving and commitment to fostering a collaborative team environment align well with the values of your organization.

In my role at Infosys, I successfully implemented a RAG model for HR platforms during a hackathon, showcasing my ability to work on complex projects and deliver innovative solutions. Additionally, my certification as a Professional SCRUM master and completion of Stanford Machine Learning and Deeplearning.ai Deep Learning Specialization courses demonstrate my dedication to continuous learning and growth in the field of AI.

While I do not have direct experience in MLOps, I am confident in my ability to quick

In [30]:
result

{'query': "\nUse the following pieces of context to create a cover letter for the following job offer. \n\n<req1> AI experience </req1>\n<req2> MLOps </req2>\n<req3> Backend developer </req3>\n\nIf the applicant has knowledge gaps from the job offer: \n    1. Compare it to similar skills he has.\n    2. If there are no similar exclude them from the cover letter and metion them at the end.\n\nDon't try to make up an answer. \nUse a letter format with three paragraphs maximum. \nKeep the answer clear, concise and semi-formal as possible.\nDo not over extend with adjectives.\n",
 'result': 'Dear Hiring Manager,\n\nI am writing to express my interest in the AI, MLOps, and Backend Developer position at your company. With my experience as an AI Software Engineer at Infosys, where I developed proprietary information retrieval modules using Large Language Models, I believe I have the technical expertise required for this role. My proactive approach to problem-solving and commitment to fosterin

# Query

In [12]:
# def get_completion(prompt, client):
    
#     completion = client.chat.completions.create(
#                     model="gpt-3.5-turbo-0125",
#                     messages=[
#                         {"role": "system", "content": "You are a helpful assistant."},
#                         {"role": "user", "content": prompt}
#                     ],
#                     max_tokens=1000,
#                     temperature=0,
#                 )
#     return completion

In [13]:
text = pages[0].page_content

prompt = f"""
Please follow the following format:
<role 1> <reasons based on experience> <areas of improvement towards role 1>
<role 2> <reasons based on experience> <areas of improvement towards role 2>
<role 3> <reasons based on experience> <areas of improvement towards role 3>

Do not do more than 3 roles
```{text}```
"""
result = get_completion(prompt, client)


In [14]:
print(result.choices[0].message.content)

Data Engineer at Infosys Instep in Amsterdam
Antonio has a strong background in data engineering, as evidenced by his successful DWH platform migration for Beam Suntory Spain and the development of a Support Decision System using Deep Learning at Ramón Y Cajal Hospital. He has experience in leading projects, implementing reporting tools, and designing NLP pipelines for biomedical document classification. Antonio's technical skills in Python, R, SQL, and various data tools make him well-suited for a Data Engineer role.

Areas of improvement:
- Antonio could focus on enhancing his knowledge of cloud platforms like AWS and Azure to stay updated with the latest technologies in data engineering.
- Developing expertise in big data technologies like Spark and Hadoop would further strengthen his profile for data engineering roles.

AI Software Engineer in Amsterdam
Antonio's experience in developing an information retrieval module using Large Language Models and OCR technologies at Infosys Ins