In [10]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate

from langchain.chains import RetrievalQA

In [11]:
# Read the pdfs from the folder
loader= PyPDFDirectoryLoader('./data')
documents=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
final_documents=text_splitter.split_documents(documents)
final_documents[0]

Document(metadata={'source': 'data\\data.pdf', 'page': 0}, page_content='EDUCATION                                         ASHWAND  NARAYANAN S  \nPhone: 9787385300  | Email : ashwandnarayanan.s2022ai -ds@sece.ac.in | GITHUB  | \n         Sri Eshwar  College  of Engineering           B. TECH  (AI & DS )             CGPA : 7.85 (Up to 3rd  Semester)    \n           Veveaham Hr Sec School                          HSC                  89.83%                                  \nSt.Paul’s Matric School                              SSLC                                   81.00%                              \n         \nINTERNSHIP  \n \nDjango intern – RVTechlearn                                                                                                                                           2024  \nSignificant contributions were made to the development of web platform, utilizing HTML, CSS, Django, and SQLite. I \nworked on a on a Movie Recommendation System project, integrating machine

In [3]:
len(final_documents)

7

In [6]:
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)

  from tqdm.autonotebook import tqdm, trange


In [9]:
import numpy as np

np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)).shape

(384,)

In [12]:

## VectorStore Creation
vectorstore=FAISS.from_documents(final_documents[:120],huggingface_embeddings)

In [14]:

## Query using Similarity Search
query="WHAT are the skills"
relevant_docments=vectorstore.similarity_search(query)

print(relevant_docments[0].page_content)

SKILLS  
 
           Programming  Language           Basic  C C++ Java R | Python . 
           Web  Technologies                       HTML |  CSS |  Basic JavaScript  | Django | Streamlit . 
           VCS  Manager                            Git | GitHub  | Anaconda  | PyCharm  | Dataspell | VS Code . 
           Frameworks                              Tensorflow | Keras | Langchain | Basic of ROS.  
           Core                                              Data  Analysis  | Machine  Learning | Deep Learning .  
2022-2026  
2020-2022  
2019-2020   LINKEDIN


In [15]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000295948DF130> search_kwargs={'k': 3}


In [18]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN']="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"

In [20]:
from langchain_community.llms import HuggingFaceHub

hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1,"max_length":500},
)
query="What are skills mensioned"
hf.invoke(query)

'What are skills mensioned in the job description?\n\nThe skills mentioned in the job description are the skills that the employer is looking for in the candidate. These skills can be technical skills, soft skills, or a combination of both.\n\nWhat are the skills that are required for the job?\n\nThe skills that are required for the job are the skills that the employer is looking for in the candidate. These skills can be technical skills, soft skills, or a combination of both.\n\nWhat are the'

In [23]:
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="mistralai/Mistral-7B-v0.1",
    task="text-generation",
    pipeline_kwargs={"temperature": 0, "max_new_tokens": 300},
    use_auth_token=True  # This will use the token from your CLI login
)

llm = hf 
llm.invoke(query)

OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.
401 Client Error. (Request ID: Root=1-6714b1eb-5937ceb544228b260f0ebb0c;c5afdcc1-9836-4e1b-9896-b5376168b6d5)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.
Access to model mistralai/Mistral-7B-v0.1 is restricted. You must have access to it and be authenticated to access it. Please log in.