In [None]:
# !pip install langchain
! pip install -U bitsandbytes
! pip install -U accelerate transformers

In [None]:
! pip install langchain_community


In [None]:
import sys
from torch import cuda, bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

In [None]:
print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Number of GPU:  1
GPU Name:  Tesla T4
Using device: cuda


In [None]:
from huggingface_hub import login
login()

In [None]:
model_id = 'google/gemma-2b-it'
token = "YOUR TOKEN"
tokenizer = AutoTokenizer.from_pretrained(model_id, token =token)
tokenizer.pad_token = tokenizer.eos_token  # رفع خطای padding

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

In [None]:
model_config = transformers.AutoConfig.from_pretrained(
   model_id,
    trust_remote_code=True,
    max_new_tokens=1024
)
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    token="YOUR TOKEN",
)

In [None]:
prompt = "tell  me a scary  story"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

tell  me a scary  story about a haunted house.

The old Victorian house, known as Blackwood Manor, stood alone on a hill overlooking the town. Its peeling paint and crooked chimney sent shivers down the spine of every resident. The locals whispered about the house's dark history, but no one dared to set foot inside.

One stormy night, a group of teenagers decided to explore the manor. They braved the creaking doors and unlocked the front door. The air grew thick with a musty smell, and


In [None]:
inputs,type(inputs)

({'input_ids': tensor([[    2,  4361,   139,   504,   476, 32864,   139, 15732]],
        device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')},
 transformers.tokenization_utils_base.BatchEncoding)

In [None]:
query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        max_length=1024,
        device_map="auto",)


Device set to use cuda:0


In [None]:
def test_model(tokenizer, pipeline, message):
    sequences = pipeline(
        message,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,)


    question = sequences[0]['generated_text'][:len(message)]
    answer = sequences[0]['generated_text'][len(message):]

    return f"Question: {question}    nAnswer: {answer}   {sequences}"

In [None]:
response = test_model(tokenizer,
                    query_pipeline,
                   "tell  me a scary  story")

In [None]:
response

In [None]:
llm = HuggingFacePipeline(pipeline=query_pipeline)

question =  "tell  me a scary  story"
response = llm(prompt=question)
response

  llm = HuggingFacePipeline(pipeline=query_pipeline)
  response = llm(prompt=question)


'tell  me a scary  story about a young woman who was trapped in a cage in the basement.\n\nThe door to her room was locked, but she managed to break free and escape through a small hole in the floorboards. She ran down the hallway and into the unknown, where she encountered a chilling sight that sent shivers down her spine.\n\nA single, withered rose lay at the dead end of the hallway, its petals twisted and gnarled beyond recognition. It was a symbol of the darkness that lurked in the shadows, a stark reminder of her confinement.\n\nAs she ventured deeper into the basement, she heard a faint rustling sound coming from the shadows. It was a sound that sent chills down her spine, a sound that seemed to come from everywhere and nowhere at once. It was the sound of something moving, something that promised to end her nightmare.\n\nShe hesitated, her heart pounding in her chest. She knew that she should leave, that the darkness she had escaped was too dangerous. But she was trapped, a capt

In [None]:
! pip install pypdf

In [None]:
loader = PyPDFLoader("/content/Abstract.pdf")
documents = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=128)
all_splits = text_splitter.split_documents(documents)

In [None]:
model_name = 'sentence-transformers/all-MiniLM-L6-v2'
model_kwargs = {"device": "cuda"}


embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)


In [None]:
! pip install chromadb

In [None]:
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db")

In [None]:
retriever = vectordb.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

In [None]:
def test_rag(qa, query):

    response = qa.run(query)
    full_response =  f"Question: {query}\nAnswer: {response}\nTotal time"
    return response

In [None]:
query = "what is the category about say it simple"
test_rag(qa, query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


"Question: what is the category about say it simple\nAnswer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nAbstract: Recommendation System is an information filtering system which seeks to predict the \n“liking” of a user for an item, with the aim to suggest the user those items which he/she is most \nlikely to select/buy. The focus of this paper is on rating prediction whose main objective is to \npredict the ratings the current user is going to give to the items which are yet to be \nrated/viewed by him/her. This paper uses a collaborative filtering based approach for generating \nrecommendation, and the model used is a clustering-based model. In this approach all the \nexisting users are clustered using whale optimization technique, instead of traditional clustering \napproaches like k-means, EM algorithm, etc. The appropriate cluster is then identified for the \na

In [None]:
query = "what is the topic just say one word"
test_rag(qa, query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


"Question: what is the topic just say one word\nAnswer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nAbstract: Recommendation System is an information filtering system which seeks to predict the \n“liking” of a user for an item, with the aim to suggest the user those items which he/she is most \nlikely to select/buy. The focus of this paper is on rating prediction whose main objective is to \npredict the ratings the current user is going to give to the items which are yet to be \nrated/viewed by him/her. This paper uses a collaborative filtering based approach for generating \nrecommendation, and the model used is a clustering-based model. In this approach all the \nexisting users are clustered using whale optimization technique, instead of traditional clustering \napproaches like k-means, EM algorithm, etc. The appropriate cluster is then identified for the \nactive

In [None]:
query = "just say one word"
test_rag(qa, query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


"Question: just say one word\nAnswer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nAbstract: Recommendation System is an information filtering system which seeks to predict the \n“liking” of a user for an item, with the aim to suggest the user those items which he/she is most \nlikely to select/buy. The focus of this paper is on rating prediction whose main objective is to \npredict the ratings the current user is going to give to the items which are yet to be \nrated/viewed by him/her. This paper uses a collaborative filtering based approach for generating \nrecommendation, and the model used is a clustering-based model. In this approach all the \nexisting users are clustered using whale optimization technique, instead of traditional clustering \napproaches like k-means, EM algorithm, etc. The appropriate cluster is then identified for the \nactive user, and the rat

In [None]:
query = "what is this about"
test_rag(qa, query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


"Question: what is this about\nAnswer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nAbstract: Recommendation System is an information filtering system which seeks to predict the \n“liking” of a user for an item, with the aim to suggest the user those items which he/she is most \nlikely to select/buy. The focus of this paper is on rating prediction whose main objective is to \npredict the ratings the current user is going to give to the items which are yet to be \nrated/viewed by him/her. This paper uses a collaborative filtering based approach for generating \nrecommendation, and the model used is a clustering-based model. In this approach all the \nexisting users are clustered using whale optimization technique, instead of traditional clustering \napproaches like k-means, EM algorithm, etc. The appropriate cluster is then identified for the \nactive user, and the ra

In [None]:
query = "what is this about"
test_rag(qa, query)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


"Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nAbstract: Recommendation System is an information filtering system which seeks to predict the \n“liking” of a user for an item, with the aim to suggest the user those items which he/she is most \nlikely to select/buy. The focus of this paper is on rating prediction whose main objective is to \npredict the ratings the current user is going to give to the items which are yet to be \nrated/viewed by him/her. This paper uses a collaborative filtering based approach for generating \nrecommendation, and the model used is a clustering-based model. In this approach all the \nexisting users are clustered using whale optimization technique, instead of traditional clustering \napproaches like k-means, EM algorithm, etc. The appropriate cluster is then identified for the \nactive user, and the ratings of the active user are predicted