# **Create a Database**

In [None]:
!pip install datasets
!pip install transformers
!pip install sentence_transformers
!pip install langchain huggingface-hub
!pip install --upgrade chromadb==0.4.14
!pip install -U langchain-community
!pip install bitsandbytes
!pip install langchain --upgrade



In [None]:
!pip install peft



In [None]:
!pip install llama-index



In [None]:
import os
import shutil
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
import torch
from langchain.chains import RetrievalQA
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings  # For Hugging Face embeddings
from langchain.vectorstores import Chroma  # For vector storage using Chroma
from peft import PeftModelForCausalLM

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
CHROMA_PATH = "/content/drive/My Drive/chromadb"
DATA_PATH = "/content/drive/My Drive/RAG_info.txt"

In [None]:
loader = DirectoryLoader(DATA_PATH, glob="*.md")
loader = TextLoader(DATA_PATH)
documents = loader.load()

def split_text(documents: list[Document]):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=300,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

    document = chunks[10]
    print(document.page_content)
    print(document.metadata)

    return chunks

chunks = split_text(documents)

if os.path.exists(CHROMA_PATH):
      shutil.rmtree(CHROMA_PATH)

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  # Create a new DB from the documents.
db = Chroma.from_documents(
    chunks, embeddings, ids=None, collection_name="langchain", persist_directory= CHROMA_PATH
)

# Persist the database in the specified directory
db.persist()
print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

Split 1 documents into 150 chunks.
needed for growth and development. However, achieving a healthy diet and regular exercise in childhood can be challenging, given the prevalence of unhealthy food consumption and sedentary lifestyles characterized by long periods of screen time. Some of the first large-scale studies associating
{'source': '/content/drive/My Drive/RAG_info.txt', 'start_index': 2064}




Saved 150 chunks to /content/drive/My Drive/chromadb.


In [None]:
query = "Can you recommend effective ab exercises?"
docs = db.similarity_search(query, k=1)

# print results
print(docs[0].page_content)

So how do we do that? We’ve chosen our favorite ab exercises to challenge every part of your core. All you need to do is pick three exercises from below, and perform 10-15 reps of each, for 3 sets – there are bodyweight options if you’re working out from home, or weighted options if you’re hitting


In [None]:
retriever = db.as_retriever()



In [None]:
query = "Can you recommend effective ab exercises?"

# Retrieve the most relevant documents
retrieved_docs = retriever.get_relevant_documents(query)
print(type(retrieved_docs))

# Print the retrieved documents
for doc in retrieved_docs:
    print(doc)

<class 'list'>
page_content='So how do we do that? We’ve chosen our favorite ab exercises to challenge every part of your core. All you need to do is pick three exercises from below, and perform 10-15 reps of each, for 3 sets – there are bodyweight options if you’re working out from home, or weighted options if you’re hitting' metadata={'source': '/content/drive/My Drive/RAG_info.txt', 'start_index': 29153}
page_content='Our 8 Best Ab Exercises To Build A Stronger Core:

Crunches

Bird Dogs

Leg Raises

Side Plank

Russian Twists

Cable Woodchopper

Cable Crunch

Med Ball V Ups' metadata={'source': '/content/drive/My Drive/RAG_info.txt', 'start_index': 29462}
page_content='You might think all you need to do is 100 crunches a day to build your abs, but there’s a bit more to it than that. Not only is it really boring, but crunches only train your abs in one motion, which is the back and forward flexion of the spine. We want to hit the abs from different angles, to' metadata={'source': '/

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:

from transformers import AutoModelForCausalLM
from peft import PeftModel, PeftConfig

# Load the PEFT config
config = PeftConfig.from_pretrained("Sahaj10/my_fine_tuned_llama2")

# Load the base model in 8-bit precision
base_model = AutoModelForCausalLM.from_pretrained(
    "genaitraining/llama-2-7b-domain-tuned",
    load_in_4bit=True,
    device_map='auto'   # This can automatically set the device map for distributed training
)

# Load the PEFT model with the fine-tuned weights
model = PeftModel.from_pretrained(base_model, "Sahaj10/my_fine_tuned_llama2")



The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained("Sahaj10/my_fine_tuned_llama2", trust_remote_code = True)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def prepare_input_for_inference(question, retrieved_docs):
    start_prompt = 'Question:\n'

    retrieved_texts = [doc.page_content for doc in retrieved_docs]

    # Join the retrieved documents into a single string for context
    context = "\nContext: " + "\n".join(retrieved_texts)

    end_prompt = '\n\nAnswer:\n '
    return f"{start_prompt}{question}{context}{end_prompt}"

question = query

prepaperd_input = prepare_input_for_inference(question, retrieved_docs)
print(prepaperd_input)

input_ids = tokenizer(prepaperd_input, return_tensors="pt", truncation=True).input_ids.to(device)

outputs = model.generate(input_ids=input_ids, max_new_tokens=200, temperature=0.6)


Question:
Can you recommend effective ab exercises?
Context: So how do we do that? We’ve chosen our favorite ab exercises to challenge every part of your core. All you need to do is pick three exercises from below, and perform 10-15 reps of each, for 3 sets – there are bodyweight options if you’re working out from home, or weighted options if you’re hitting
Our 8 Best Ab Exercises To Build A Stronger Core:

Crunches

Bird Dogs

Leg Raises

Side Plank

Russian Twists

Cable Woodchopper

Cable Crunch

Med Ball V Ups
You might think all you need to do is 100 crunches a day to build your abs, but there’s a bit more to it than that. Not only is it really boring, but crunches only train your abs in one motion, which is the back and forward flexion of the spine. We want to hit the abs from different angles, to
there are bodyweight options if you’re working out from home, or weighted options if you’re hitting the gym.

Answer:
 


In [None]:
result = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]

# **With RAG**

In [None]:
print(result)

Question:
Can you recommend effective ab exercises?
Context: So how do we do that? We’ve chosen our favorite ab exercises to challenge every part of your core. All you need to do is pick three exercises from below, and perform 10-15 reps of each, for 3 sets – there are bodyweight options if you’re working out from home, or weighted options if you’re hitting
Our 8 Best Ab Exercises To Build A Stronger Core:

Crunches

Bird Dogs

Leg Raises

Side Plank

Russian Twists

Cable Woodchopper

Cable Crunch

Med Ball V Ups
You might think all you need to do is 100 crunches a day to build your abs, but there’s a bit more to it than that. Not only is it really boring, but crunches only train your abs in one motion, which is the back and forward flexion of the spine. We want to hit the abs from different angles, to
there are bodyweight options if you’re working out from home, or weighted options if you’re hitting the gym.

Answer:
 1. Crunches: Lie on the floor with your hands behind your back and

# **BAse Model**

In [None]:


# Example question
question = query
# Tokenize the input question
input_ids = tokenizer(question, return_tensors="pt", truncation=True).input_ids.to(device)

# Generate the response from the base model
outputs = base_model.generate(input_ids=input_ids, max_new_tokens=150, temperature=0.6)  # Adjust max_new_tokens and temperature as needed

# Decode the generated tokens back to text
generated_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the generated answer
print(generated_answer)

Can you recommend effective ab exercises?
бранись, I'm glad you're interested! Here are some effective ab exercises:

1. Plank: Hold a full plank position for 30-60 seconds every day. Gradually increase the duration.
2. Bicycle Crunches: Lie on a mat, bring your knees together, and pedal with your feet.
3. Russian Twists: Sit on a mat, hold a weighted ball or dumbbells, and twist your torso from side to side.
4. Leg Raises: Lie on a mat, raise your legs straight in the air, and lower them back down.
5. B burpees:


**# Model without RAG**

In [None]:
def prepare_input_for_inference2(question):
    start_prompt = 'Question:\n'
    end_prompt = '\n\nAnswer:\n '
    return f"{start_prompt}{question}{end_prompt}"
# Example question
question = query
p_question = prepare_input_for_inference2(question)
# Tokenize the input question
input_ids = tokenizer(p_question, return_tensors="pt", truncation=True).input_ids.to(device)

# Generate the response from the base model
outputs = model.generate(input_ids=input_ids, max_new_tokens=150, temperature=0.6)  # Adjust max_new_tokens and temperature as needed

# Decode the generated tokens back to text
generated_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the generated answer
print(generated_answer)

Question:
Can you recommend effective ab exercises?

Answer:
 1. Plank: This is a classic core exercise that targets your abs. Hold a plank position for 30 seconds to 2 minutes. To make it more challenging, add in side bends or obliques.

2. Bicycle Crunches: Lay on your back with your hands behind your head. Tighten your ab muscles and pedal like a bicycle.

3. Leg Raises: Lie on your back and raise your legs straight in the air. Lower your legs back down and repeat.

4. Wall Sits: Stand with your back against a wall. Slowly sink into a seated position and keep your legs bent at the Wall
