# Database test

In [1]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import MongoDBAtlasVectorSearch
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
import os

load_dotenv()

True

In [2]:
DB_NAME = "jobFinder"
COLLECTION_NAME = "cv_mehdi"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_search_index"
MONGODB_ATLAS_CLUSTER_URI = os.getenv("MONGO_URI")

In [3]:
mongo_client = MongoClient(MONGODB_ATLAS_CLUSTER_URI, server_api=ServerApi('1'))
collection = mongo_client[DB_NAME][COLLECTION_NAME]
print(collection)

Collection(Database(MongoClient(host=['ac-wzyenof-shard-00-01.byabgoe.mongodb.net:27017', 'ac-wzyenof-shard-00-02.byabgoe.mongodb.net:27017', 'ac-wzyenof-shard-00-00.byabgoe.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', authsource='admin', replicaset='atlas-u5o2v0-shard-0', tls=True, server_api=<pymongo.server_api.ServerApi object at 0x0000017BB761A9A0>), 'jobFinder'), 'cv_mehdi')


In [4]:
embeddings = OpenAIEmbeddings()#disallowed_special=())
vector_search = MongoDBAtlasVectorSearch(collection, embedding=embeddings, index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME, embedding_key="embedding")

In [5]:
try:
    mongo_client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


vectera

In [7]:
query = "certifications"

vector_search.similarity_search_with_score(query, k=3)

[(Document(page_content='\n- French TCF C2\n- English TOIEC C1*\n- Arabic (native language)\n- German A1\n', metadata={'_id': ObjectId('6583185e743f9eb02c03f4a3'), 'embedding': [-0.0016725025049321046, 0.007311486701334954, 0.007683707466578165, -0.006759801471419182, -0.026919564461432656, 0.025776313837293945, -0.04727208282069419, -0.0024343927132484747, -0.012382999167971278, -0.02173505555717872, 0.018478119903179657, 0.028873725108774027, -0.0030292817649378682, -0.002780026547066479, -0.003006018025317829, 0.012083892813393353, 0.01984736340255359, -0.015460470822959068, 0.004350334442939298, 0.00812904357380924, -0.0029229328750838175, 0.029804278418865925, -0.006979146100398907, 0.0019325588528698835, -0.0004561370626744816, 0.0004931099289172458, 0.01431722019882036, -0.034829262940600875, -0.007883112013404308, -0.02582948863146694, 0.034909025131860366, 0.023755684697236334, -0.03097411644009113, -0.01354619033992486, -0.009066243733172764, 0.0011523899254718695, -0.0147426

In [8]:
prompt_template ="""
    You are a smart agent that can answer questions about CVs.
    if the question is not related to CVs, you should answer "I don't know please refer to Mehdi for more information".
    if the question is related to CVs, you should answer the question.
    If the question was not clear enough, you should ask for more information.
    if the question was in French, you should answer in French.
    use the following information to answer the question:
    {context}
    question: {question}
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [9]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

qa_retriever = vector_search.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5},
    return_vector=False,
)
llm = OpenAI(temperature=0.3, max_tokens=200, top_p=0.9)

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=qa_retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

docs = qa({"query": "gpt-4 compute requirements"})

print(docs["result"])
print(docs["source_documents"])


I don't know please refer to Mehdi for more information.
[Document(page_content='\nProgramming Languages:\n- Python: Proficient for 6 years, used in data analysis, machine learning, and web development.\n- C++ and C: Proficient for 6 years, applied in software development and algorithm design.\n- Matlab: Experience of 4 years, primarily used for scientific computing, numerical analysis, and algorithm prototyping.\n- JavaScript: Proficiency of 3 years, employed in web development for frontend interactivity and functionality.\n- SQL for database management (SQLite, MySQL, PostgreSQL)\nFrameworks:\n- Frontend: Proficient in React and Svelte, used for developing dynamic and responsive user interfaces.\n- Backend: Proficient in Flask, Streamlit, and FastAPI, employed for creating backend services, APIs, and web applications.\n- Machine Learning: Competent in TensorFlow and PyTorch, utilized for implementing and deploying machine learning models in various projects.\n- Version Control: Prof

In [10]:
docs = qa({"query": "how old is Mehdi?"})
print(docs["result"])

Answer: Mehdi is 21 years old, born on January 1st, 2000.


# Agent section

In [11]:
from langchain.agents import AgentType, initialize_agent
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentType, initialize_agent, load_tools
from langchain.tools import tool
from datetime import datetime

In [16]:
@tool("current_date", return_direct=False)
def current_date(input:str) -> str:
  """Used to know the current date"""
  return str(datetime.now())

@tool("mehdi_cv_data", return_direct=False)
def mehdi_cv_data(input:str) -> str:
  """Used as bank of data for Mehdi CV the input should be a question related to the task to perform"""
  context_helper = "\n".join([ x[0].page_content for x in vector_search.similarity_search_with_score(input, k=5)])
  return context_helper

@tool("meaning_search", return_direct=False)
def meaning_search(input:str) -> str:
  """Used when you look for the meaning of a word"""
  llm = OpenAI(temperature=0.3, max_tokens=50, top_p=0.9)
  return llm(f"What is {input}?")

@tool("no_answer_found", return_direct=True)
def no_answer_found(input:str) -> str:
  """Used when the question provided by the user was not found in the data bank example : when did Mehdi started working at Google? => no answer found"""
  llm = OpenAI(temperature=0.8, max_tokens=50, top_p=0.9)
  return str(llm(f"answer with something that has the following meaning: I don't know or the input provided does not match any of my database information please refer to Mehdi for more information"))
  

@tool("out_of_context", return_direct=True)
def out_of_context(input:str) -> str:
  """Used when the question provided by the user is not related to the task to perform example : what is the weather today?"""
  message = f"I don't know please refer to Mehdi for more information"
  
  return message


In [17]:
llm = ChatOpenAI(temperature=0.5, model='gpt-4')
tools = load_tools(["llm-math"], llm=llm)
tools.append(current_date)
tools.append(mehdi_cv_data)
tools.append(meaning_search)
tools.append(no_answer_found)
tools.append(out_of_context)

In [18]:
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

In [19]:
agent.run("Give me his linkedin url?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to find Mehdi's LinkedIn URL from his CV data.
Action: mehdi_cv_data
Action Input: What is Mehdi's LinkedIn URL?[0m
Observation: [38;5;200m[1;3m - Phone: +33 7 52 07 57 72 - Email: mehdi.hayani_mechkouri@ensam.eu - LinkedIn: https://www.linkedin.com/in/mehdi-hayani-mechkouri-9ba3161bb/


Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascin

"Mehdi's LinkedIn URL is https://www.linkedin.com/in/mehdi-hayani-mechkouri-9ba3161bb/."

In [20]:
agent.run("How old is Mehdi?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to check Mehdi's birthdate in his CV data to calculate his age.
Action: mehdi_cv_data
Action Input: "What is Mehdi's birthdate?"[0m
Observation: [38;5;200m[1;3m
Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascinated by the potential of AI.

 - Phone: +33 7 52 07 57 72 - Email: mehdi.hayani_mechkouri@ensam.eu - LinkedIn: https://www.linke

'Mehdi is 23 years old.'

In [21]:
agent.run("Did Mehdi work at Google?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to check Mehdi's CV data to see if he has worked at Google.
Action: mehdi_cv_data
Action Input: Did Mehdi work at Google?[0m
Observation: [38;5;200m[1;3m
Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascinated by the potential of AI.

 - Phone: +33 7 52 07 57 72 - Email: mehdi.hayani_mechkouri@ensam.eu - LinkedIn: https://www.linkedin.com

'No, Mehdi did not work at Google.'

In [22]:
agent.run("How many certifications does Mehdi have? and what are they?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe question is asking about Mehdi's certifications, which is information that would be found in his CV. I should use the mehdi_cv_data function to get this information.
Action: mehdi_cv_data
Action Input: How many certifications does Mehdi have? and what are they?[0m
Observation: [38;5;200m[1;3m
Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fasc

"\n\nI'm sorry, I don't have that information, please contact Mehdi for more details."

In [23]:
agent.run("wich one is the oldest internship?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to look at Mehdi's CV data to find out which internship is the oldest.
Action: mehdi_cv_data
Action Input: "What is Mehdi's oldest internship?"[0m
Observation: [38;5;200m[1;3m
Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascinated by the potential of AI.

 - Phone: +33 7 52 07 57 72 - Email: mehdi.hayani_mechkouri@ensam.eu - LinkedIn: ht

'The oldest internship is the one at Groupe Renault which occurred between August 2021 and October 2021.'

In [24]:
agent.run("what position did mehdi occupie at GE grid solutions?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe question is asking for specific information from Mehdi's CV.
Action: mehdi_cv_data
Action Input: What position did Mehdi occupy at GE Grid Solutions?[0m
Observation: [38;5;200m[1;3m
1. Full Stack Developer at GE Grid Solutions - Type: `Internship` between [June - September 2023], Role: [Intern], Title: [Web Developer]. Tasks and Achievements: Digitized the quality control process by creating an ETL system. Automated the data pipeline using RPA. Standardized the database (SQL) and developed a modular user interface. Created a dashboard for Failure Mode and Effects Analysis (DFMEA) using Power BI.
2. Data Scientist at AI-Inside - Type: `Internship` between [August 2022 - October 2022], Role: [Intern], Title: [Data Scientist]. Tasks and Achievements: Developed and deployed an application using AI and data scraping to predict car sales prices and identify the most advantageous offers (Flask, MongoDB, TensorFlow, Selenium, 

'Mehdi occupied the position of Full Stack Developer at GE Grid Solutions.'

In [25]:
agent.run("What are the inernships that mehdi did?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to access Mehdi's CV data to answer this question.
Action: mehdi_cv_data
Action Input: What internships did Mehdi do?[0m
Observation: [38;5;200m[1;3m
Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascinated by the potential of AI.

 - Phone: +33 7 52 07 57 72 - Email: mehdi.hayani_mechkouri@ensam.eu - LinkedIn: https://www.linkedin.com/in/

'Mehdi has done the following internships:\n1. Full Stack Developer at GE Grid Solutions between June and September 2023. During this internship, he digitized the quality control process by creating an ETL system, automated the data pipeline using RPA, standardized the database (SQL), developed a modular user interface, and created a dashboard for Failure Mode and Effects Analysis (DFMEA) using Power BI.\n2. Data Scientist at AI-Inside between August and October 2022. He developed and deployed an application using AI and data scraping to predict car sales prices and identify the most advantageous offers (Flask, MongoDB, TensorFlow, Selenium, Pandas) during this internship.\n3. Industrial Engineer at Groupe Renault between August and October 2021. He actively collaborated on a production line extension project and conducted in-depth studies on various lifting means during this internship.'

In [27]:
output = agent.run("How can I contact Mehdi?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to look up Mehdi's contact information.
Action: mehdi_cv_data
Action Input: What is Mehdi's contact information?[0m
Observation: [38;5;200m[1;3m - Phone: +33 7 52 07 57 72 - Email: mehdi.hayani_mechkouri@ensam.eu - LinkedIn: https://www.linkedin.com/in/mehdi-hayani-mechkouri-9ba3161bb/


Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascin

In [28]:
print(output)

You can contact Mehdi through the following methods: 
- Phone: +33 7 52 07 57 72 
- Email: mehdi.hayani_mechkouri@ensam.eu 
- LinkedIn: https://www.linkedin.com/in/mehdi-hayani-mechkouri-9ba3161bb/


In [29]:
langues = agent.run("Combien de langues parle Mehdi ?")
print(langues)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLa question concerne les informations sur le CV de Mehdi. Je devrais utiliser l'outil mehdi_cv_data pour obtenir cette information.
Action: mehdi_cv_data
Action Input: Combien de langues parle Mehdi ?[0m
Observation: [38;5;200m[1;3m
Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascinated by the potential of AI.

 - Phone: +33 7 52 07 57 72 - Ema

In [30]:
output = agent.run("es-ce que Mehdi a une experience en data science?")
print(output)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThe question is asking if Mehdi has experience in data science. This is a question about Mehdi's CV.
Action: mehdi_cv_data
Action Input: "Does Mehdi have experience in data science?"[0m
Observation: [38;5;200m[1;3m
Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascinated by the potential of AI.

 - Phone: +33 7 52 07 57 72 - Email: mehdi.hayani_m

In [31]:
output = agent.run("How many internships did Mehdi do?")
print(output)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI need to retrieve this information from Mehdi's CV.
Action: mehdi_cv_data
Action Input: How many internships did Mehdi do?[0m
Observation: [38;5;200m[1;3m
Hello, I am Mehdi HAYANI MECHKOURI, born on January 1st, 2000. I am of Moroccan nationality. Currently in Paris, France. I am currently pursuing a dual degree in Arts et Métiers ParisTech specializing in Digital Engineering, Data Science, and Artificial Intelligence. I am looking for a 6-month internship starting from February 2023 in the field of AI and data science. I am a curious person with a strong thirst for knowledge. I am someone who seeks to solve problems, especially those related to climate change, food security, and energy security. But I am ready to face any challenge. My preferred sector is programming, and I am fascinated by the potential of AI.

 - Phone: +33 7 52 07 57 72 - Email: mehdi.hayani_mechkouri@ensam.eu - LinkedIn: https://www.linkedin.com/in/m

## Conclusion

The agent gives good answers overall but it needs to use chat gpt 4 in order to give accurate answers