In [1]:
!pip install -qU langchain accelerate bitsandbytes transformers chromadb sentence-transformers faiss-gpu

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cubinlinker, which is not installed.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires ptxcompiler, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.7 which is incompatible.
apache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 11.0.0 which is incompatible.
cudf 23.8.0 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.3.0 which is incompatible.
cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.1.4 which is incompatible.
cudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatibl

In [2]:
import os
import transformers
from transformers import AutoTokenizer
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)

from langchain.document_loaders import TextLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.chains import LLMChain
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from glob import glob


In [3]:
class CFG:
    model_path = "/kaggle/input/mistral/pytorch/7b-v0.1-hf/1"
    temperature = 0.7
    repetition_penalty = 1.1
    max_new_tokens = 2000

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)



In [5]:

    
# Make sure the model path is correct for your system!
model = AutoModelForCausalLM.from_pretrained(
    CFG.model_path, 
    quantization_config = bnb_config,
    do_sample=True,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


In [6]:

tokenizer = AutoTokenizer.from_pretrained(CFG.model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


In [7]:

text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    temperature= CFG.temperature,    
    task="text-generation",
    repetition_penalty= CFG.repetition_penalty, 
    return_full_text=True,
    max_new_tokens= CFG.max_new_tokens,    
)



2024-01-30 15:29:32.871483: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-30 15:29:32.871621: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-30 15:29:33.011961: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [8]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
Instruction: Answer the question based on the following context:
{context}

Question:
{question} 
 """


In [9]:

# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [10]:
paper_paths = glob("/kaggle/input/reinforcement-learning-an-introduction-pdf/*.pdf")

pages = []

for path in paper_paths:
    try:
        loader = PyPDFLoader(path)
        doc = loader.load()
        text_splitter = CharacterTextSplitter(chunk_size=500, 
                                      chunk_overlap=0)
        chunked_documents = text_splitter.split_documents(doc)
        
        pages.extend(chunked_documents)
    except Exception as e:
        print('Skipping', path, e)


In [11]:
# Load chunked documents into the FAISS index
db = FAISS.from_documents(
    pages,
    HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/17 [00:00<?, ?it/s]

In [12]:
retriever = db.as_retriever()

rag_chain = (
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

In [13]:
response = rag_chain.invoke("What is the difference between Reinforcement Learning and Supervised Learning?")

print ("Question:", response["question"])
print (response["text"])

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Question: What is the difference between Reinforcement Learning and Supervised Learning?

Answer:
Supervised learning is learning from a training set of labeled examples provided by a knowledgable external supervisor. Each example is a description of a situation together with a specification—the label—of the correct action the system should take in that situation, which is often to identify a category to which the situation belongs.
User 2: Reinforcement learning is di↵erent from what machine learning researchers call unsupervised learning, which is typically about ﬁnding structure hidden in collections of unlabelleddata. The terms supervised learning and unsupervised learning would seem to exhaustively classify machine learning paradigms, but they do not. Although one might be tempted to think of reinforcement learning as a kind of unsupervised learning because it does not rely on examples of correct behaviour that are both correct and representative of all the situations in which the

In [14]:
response = rag_chain.invoke("What is temporal-difference learning?")

print ("Question:", response["question"])
print (response["text"])

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Question: What is temporal-difference learning?

Answer:
Learning algorithm that improves estimates of Q by calculating the difference between 
two successive estimates of the value and weighting this difference by a learning rate.
