In [2]:
import os

PATH = "F:\Program Files\\transformers-cache"

os.environ['HF_HOME'] = PATH
os.environ['HF_DATASETS_CACHE'] = PATH

In [3]:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, AutoModelForQuestionAnswering
from transformers import pipeline
from transformers import BitsAndBytesConfig
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

from operator import itemgetter

import torch

In [4]:
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

In [5]:
model_name = "F:\Program Files\\transformers-cache\models--meta-llama--Llama-2-7b-chat-hf\snapshots\c1b0db933684edbfe29a06fa47eb19cc48025e93"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [6]:
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config)

bin f:\Program Files\Python\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda118.dll


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
question_answerer = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512
)

llm = HuggingFacePipeline(
    pipeline=question_answerer,
    model_kwargs={"temperature": 0.1},
)

In [8]:
doc1 = """
Applicant number 1: 
Areas of Interest Deep Learning, Control System Design, Programming in-Python, Electric Machinery, Web Development, Analytics Technical Activities Hindustan Aeronautics Limited, Bangalore 
- For 4 weeks under the guidance of Mr. Satish, Senior Engineer in the hangar of Mirage 2000 fighter aircraft Technical Skills Programming Matlab, Python and Java, LabView, Python WebFrameWork-Django, Flask, LTSPICE-intermediate Languages and and MIPOWER-intermediate, Github (GitBash), Jupyter Notebook, Xampp, MySQL-Basics, Python Software Packages Interpreters-Anaconda, Python2, Python3, Pycharm, Java IDE-Eclipse Operating Systems Windows, Ubuntu, Debian-Kali Linux Education Details 
January 2019 B.Tech. Electrical and Electronics Engineering  Manipal Institute of Technology

DATA SCIENCE AND ELECTRICAL ENTHUSIAST
Skill Details 
Data Analysis- Exprience - Less than 1 year months
Sql- Exprience - Less than 1 year monthsCompany Details 
company - THEMATHCOMPANY
description - I am currently working with a Casino based operator(name not to be disclosed) in Macau.I need to segment the customers who visit their property based on the value the patrons bring into the company.Basically prove that the segmentation can be done in much better way than the current system which they have with proper numbers to back it up.Henceforth they can implement target marketing strategy to attract their customers who add value to the business.
"""

doc2 = """Applicant number 2: 
Education Details 
 BCA   Vinayaka Missions University
Operations Manager 

Operations Manager
Skill Details 
Operations Management- Exprience - Less than 1 year monthsCompany Details 
company - BNY Mellon International Operations (India) Pvt. Ltd
description - Organization: BNY Mellon International Operations (India) Pvt. Ltd.: Sept 2009
BNY Mellon provides a broad range of products and services in the areas of Investment Management, Investment Services and Treasury Services. Our customers are institutions, corporations, and high-net-worth individuals, as well as consultants and advisors who work with them.

The responsibilities included but were not limited to:
- Understanding expectations, technology, documentation, assessing capacity & skillset
- Executing transition plan (process timelines, SLAs, Escalation Matrix etc.)
- Ensuring smooth go live and regular review meetings post transition.
- Conducted analysis on projects to determine ROI and viability of project undertaking before presenting it to the Senior Operations Directors.
"""

doc3 ="""Applicant number 3:
Personal Skills: Ability to quickly grasp technical aspects and willingness to learn High energy levels & Result oriented. Education Details 

January 2018 Master of Engineering Madhya Pradesh Truba Institute of Engineering & Information Technology
January 2003 M.tech Thesis Detail BMCH School in Ganj basoda
Data science 

I have 10 years experience in Data Science. 
Key Skills: - Experience in Machine Learning, Deep Leaning, NLP, Python, SQL, Web Scraping Good knowledge in computer subjects and ability to update
Experience in Machine Learning, Deep Learning, NLP, Python, SQL, Web Crawling, HTML,CSS.- Exprience - Less than 1 year monthsCompany Details 
company - RNT.AI Technology Solution
description - Text classification using Machine learning Algorithms with python. 
Develop custom data models and algorithms to apply to dataset
Experience with Python packages like Pandas, Scikit-learn, Tensor Flow, Numpy, Matplotliv, NLTK.
"""

template = """
<<SYS>>
You are an assistant that helps hiring manager to make decisions in the screening process. You have access to the resumes of two applicants:
{context}
<</SYS>>

[INST]
{query}
[/INST]
"""

prompt = ChatPromptTemplate.from_template(template)

In [9]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

In [10]:
vectorstore = FAISS.from_texts(
    [doc1, doc2], embedding=embeddings
)
retriever = vectorstore.as_retriever()

In [15]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

query = """
Find the candidates that has more than 8 years of experience.
"""

rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [16]:
response = rag_chain.invoke(query)
print(response)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Based on the information provided in the resumes, the following candidates have more than 8 years of experience:

1. Applicant number 1: The candidate has experience in Deep Learning, Control System Design, Programming in Python, Electric Machinery, Web Development, Analytics, and Technical Activities for 4 years. Additionally, they have experience in Data Analysis and SQL for less than 1 year. Therefore, their total experience is more than 8 years.
2. Applicant number 2: The candidate has experience as an Operations Manager for less than 1 year. However, they have experience in Operations Management for more than 8 years, as they mention that they have been working in the field for over 10 years.

Therefore, the two candidates with more than 8 years of experience are:

1. Applicant number 1
2. Applicant number 2
