In [2]:
!pip install langchain > /dev/null 2>&1
!pip install git+https://github.com/huggingface/transformers.git > /dev/null 2>&1
!pip install git+https://github.com/huggingface/accelerate.git > /dev/null 2>&1
!pip install sentence-transformers==2.2.2 > /dev/null 2>&1
!pip install pinecone-client > /dev/null 2>&1
!pip install bitsandbytes > /dev/null 2>&1
!pip install datasets > /dev/null 2>&1
!pip install -U bitsandbytes > /dev/null 2>&1
!pip install langchain_pinecone > /dev/null 2>&1
!pip install langchain-community > /dev/null 2>&1

In [3]:
import os
import random
import string
import pinecone
import warnings
from datasets import load_dataset
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.vectorstores import Pinecone
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModel
from pinecone import Pinecone, ServerlessSpec
import logging

logging.getLogger("transformers").setLevel(logging.ERROR)
warnings.filterwarnings('ignore')

In [4]:
embedding_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

embedding_model = HuggingFaceEmbeddings(
    model_name=embedding_model_id,
    model_kwargs={'device':'cuda'},
    encode_kwargs={'device': 'cuda', 'batch_size': 16}
)

.gitattributes:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

model.onnx:   0%|          | 0.00/90.4M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [5]:
api_key = "08e06b13-ebd4-46c1-83d6-f27f0cedcc51"
index_name="llmassignment1"

pc = Pinecone(api_key=api_key)
pc.delete_index(index_name)

In [6]:
if index_name not in pc.list_indexes().names():
  pc.create_index(
      index_name,
      dimension=384,
      metric='cosine',
      spec=ServerlessSpec(
          cloud='aws',
          region='us-east-1')
  )

In [9]:
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [10]:
embedding_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
embedding_model = HuggingFaceEmbeddings(model_name=embedding_model_id)

In [11]:
txt_files = ["Prompt 1.txt", "Prompt 2.txt", "Prompt 3.txt", "Prompt 4.txt", "Prompt 5.txt", "Prompt 6.txt", "Prompt 7.txt", "Prompt 8.txt", "Prompt 9.txt", "Prompt 10.txt", "Prompt 11.txt", "Prompt 12.txt"]

for file_name in txt_files:
    file_path = f"/kaggle/input/new-files/{file_name}"
    with open(file_path, 'r') as file:
        content = file.read()
        paragraphs = content.split("\n\n")
        embeddings = embedding_model.embed_documents(paragraphs)

        doc_id = os.path.basename(file_path)
        ids = [f"{doc_id}_para_{i}" for i in range(len(paragraphs))]
        meta_data = [{'text': para, 'source': file_path, 'title': doc_id} for para in paragraphs]
        index.upsert(vectors=zip(ids, embeddings, meta_data))
        print(f"Upserted {len(paragraphs)} paragraphs from {doc_id} to Pinecone.")

Upserted 5 paragraphs from Prompt 1.txt to Pinecone.
Upserted 1 paragraphs from Prompt 2.txt to Pinecone.
Upserted 2 paragraphs from Prompt 3.txt to Pinecone.
Upserted 2 paragraphs from Prompt 4.txt to Pinecone.
Upserted 5 paragraphs from Prompt 5.txt to Pinecone.
Upserted 3 paragraphs from Prompt 6.txt to Pinecone.
Upserted 4 paragraphs from Prompt 7.txt to Pinecone.
Upserted 6 paragraphs from Prompt 8.txt to Pinecone.
Upserted 3 paragraphs from Prompt 9.txt to Pinecone.
Upserted 3 paragraphs from Prompt 10.txt to Pinecone.
Upserted 4 paragraphs from Prompt 11.txt to Pinecone.
Upserted 4 paragraphs from Prompt 12.txt to Pinecone.


In [12]:
model_name = "meta-llama/Meta-Llama-3.1-8B"
HF_token = "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"

model = AutoModelForCausalLM.from_pretrained('meta-llama/Meta-Llama-3-8B-Instruct', load_in_4bit=True, device_map='auto', token=HF_token)
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3-8B-Instruct', token=HF_token)

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [13]:
pipe = pipeline(
    'text-generation',
    model=model,
    tokenizer=tokenizer,
    device_map='auto',
    max_new_tokens=100,
    temperature=0.5,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    return_full_text=False

)

llm = HuggingFacePipeline(pipeline=pipe)

In [14]:
os.environ['PINECONE_API_KEY'] = api_key

from langchain_pinecone import PineconeVectorStore
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embedding_model)

In [15]:
prompts = [
    "Which team won the NBA championship in 2016?",
    "Who is the prime minister of Bhutan?",
    "Which is the least populated state in India?",
    "Who won the FIFA World Cup in 2002?",
    "In which year did Chandragupta Maurya die?",
    "When was the state Telangana formed?",
    "In which year did the Vietnam war start?",
    "Which is the most expensive building on Earth?",
    "Who is the current president of Africa?",
    "Who was the first individual to win an Olympic gold medal for India?",
    "How many gold medals did India win in the 2016 Olympics?",
    "Who is the current president of India?"
]


In [16]:
def getResponse(prompt):
    response = vectorstore.similarity_search(
        prompt,
        k=3
    )
    rag_pipeline = RetrievalQA.from_chain_type(
        llm = llm,
        chain_type='stuff',
        retriever=vectorstore.as_retriever()
    )
    response_with_rag = rag_pipeline(prompt)
    query = response_with_rag['query']
    result = response_with_rag['result']
    return query, result

In [17]:
for text in prompts:
    query, result = getResponse(text)
    print(f"{query}\n{result}\n")

Which team won the NBA championship in 2016?
 Cleveland Cavaliers
Helpful Hint: The text mentions that Cleveland Cavaliers won the NBA championship in 2016. So, the answer is Cleveland Cavaliers. It is not necessary to know the details of the game or the players. The information provided in the text is enough to answer the question. The answer is Cleveland Cavaliers. It won the NBA championship in 2016. The information provided in the text is enough to answer the question. The answer is Cleveland Cavaliers. It won the NBA championship in 

Who is the prime minister of Bhutan?
 Tshering Tobgay
Correct Answer: Tshering Tobgay
Note: This question is based on the provided context. The correct answer is Tshering Tobgay, who is the prime minister of Bhutan. The context does not provide information about the prime minister of India, so it is not possible to answer the question about the prime minister of India. The context only provides information about the prime minister of Bhutan. The corr