In [1]:
# Install Pytorch & other libraries
!pip install "torch==2.1.2" tensorboard

# Install Hugging Face libraries
!pip install  --upgrade \
  "transformers==4.38.2" \
  "datasets==2.16.1" \
  "accelerate==0.26.1" \
  "evaluate==0.4.1" \
  "bitsandbytes==0.42.0" \
  "trl==0.7.11" \
  "peft==0.8.2" \
    "langchain" \
"sentence-transformers" \
"faiss-cpu"
!pip install unstructured
!pip install pdfminer
!pip install pdfminer.six

Collecting transformers==4.38.2
  Downloading transformers-4.38.2-py3-none-any.whl.metadata (130 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m130.7/130.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets==2.16.1
  Downloading datasets-2.16.1-py3-none-any.whl.metadata (20 kB)
Collecting accelerate==0.26.1
  Downloading accelerate-0.26.1-py3-none-any.whl.metadata (18 kB)
Collecting evaluate==0.4.1
  Downloading evaluate-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting bitsandbytes==0.42.0
  Downloading bitsandbytes-0.42.0-py3-none-any.whl.metadata (9.9 kB)
Collecting trl==0.7.11
  Downloading trl-0.7.11-py3-none-any.whl.metadata (10 kB)
Collecting peft==0.8.2
  Downloading peft-0.8.2-py3-none-any.whl.metadata (25 kB)
Collecting langchain
  Downloading langchain-0.1.16-py3-none-any.whl.metadata (13 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-2.7.0-py3-none-any.whl.metadata (11 kB)
Collecting fai

In [2]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("hf_token")

In [3]:
import torch
from IPython.display import display_markdown
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import pipeline
import transformers
from langchain.document_loaders import UnstructuredPDFLoader,PDFMinerLoader,TextLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Hugging Face model id
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,token=hf_token,
    model_kwargs={
        "torch_dtype": torch.float16,
        "quantization_config": {"load_in_4bit": True},
        "low_cpu_mem_usage": True,
    },
)

terminators =  [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

2024-04-19 06:44:38.634319: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-19 06:44:38.634423: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-19 06:44:38.762394: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/126 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
### Pdf file Path for RAG
pdf_file_path = "/kaggle/input/deep-learning-ian-goodfellow/DeepLearningBook.pdf"

In [5]:
class Llama3_8B_gen:
    def __init__(self,pipeline):
        self.pipeline= pipeline
        
    @staticmethod
    def generate_prompt(query,retrieved_text):
        messages = [
            {"role": "system", "content": "Answer the Question for the Given below context and information and not prior knowledge, only give the output result \n\ncontext:\n\n{}".format(retrieved_text) },
            {"role": "user", "content": query},]
        return pipeline.tokenizer.apply_chat_template(messages, tokenize=False,add_generation_prompt=True)
    
    def generate(self,query,retrieved_context):
        prompt = self.generate_prompt(query ,retrieved_context)
        output =  pipeline(prompt,max_new_tokens=512,eos_token_id=terminators,do_sample=True,
                            temperature=0.7,top_p=0.9,)         
        return output[0]["generated_text"][len(prompt):]

In [6]:
class Langchain_RAG:
    def __init__(self,pdf_file_path):
        self.embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
        self.pdf_file_path = pdf_file_path
        print("loading pdf file , this may take time to process")
        self.loader = loader = PDFMinerLoader(self.pdf_file_path)   
        self.data = self.loader.load()
        print("<< pdf file loaded")
        print("<< chunking")
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"])
        self.texts = text_splitter.split_documents(self.data)
        print("<< chunked")
        self.get_vec_value= FAISS.from_documents(self.texts, self.embeddings)
        print("<< saved")
        self.retriever = self.get_vec_value.as_retriever(search_kwargs={"k": 4})
        
    def __call__(self,query):
        rev = self.retriever.get_relevant_documents(query) 
        return "".join([i.page_content for i in rev])

In [7]:
text_gen = Llama3_8B_gen(pipeline=pipeline)
retriever = Langchain_RAG(pdf_file_path=pdf_file_path)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

loading pdf file , this may take time to process
<< pdf file loaded
<< chunking
<< chunked
<< saved


In [8]:
def Rag_qa(query):
    retriever_context = retriever(query)
    resut = text_gen.generate(query,retriever_context)
    return resut

In [9]:
display_markdown(Rag_qa("What is Deep Learning?"),raw=True)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Deep learning is an approach to machine learning that has drawn heavily on our knowledge of the human brain, statistics and applied math. Specifically, it is a type of machine learning, a technique that allows computer systems to improve with experience and data.

In [10]:
display_markdown(Rag_qa("Explain Convotional Neural Network."),raw=True)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


A Convolutional Neural Network (CNN) is a type of neural network architecture that is particularly well-suited for processing data with grid-like topology, such as images. The key idea behind CNNs is to leverage the spatial relationships between pixels in an image to improve performance on image-related tasks.

A CNN typically consists of multiple layers, each of which performs a specific function:

1. **Convolutional Layer**: This layer applies a set of filters to the input data, scanning the data in a sliding window fashion. Each filter learns to detect specific features, such as edges or textures, in the input data. The filters are typically small, 3x3 or 5x5, and are applied in a hierarchical manner, allowing the network to capture features at multiple scales.
2. **Activation Function**: After the convolutional layer, an activation function is applied to introduce non-linearity into the network. Common activation functions include ReLU (Rectified Linear Unit), Sigmoid, and Tanh.
3. **Pooling Layer**: The pooling layer reduces the spatial dimensions of the input data, allowing the network to capture larger features and reducing the number of parameters. Common pooling functions include Max Pooling and Average Pooling.
4. **Flattening**: The output of the convolutional and pooling layers is flattened into a 1D vector, which is then fed into the fully connected layers.
5. **Fully Connected Layers**: The flattened output is fed into one or more fully connected layers, which are similar to traditional neural networks. These layers learn to recognize more complex features and patterns.

The key innovations in CNNs are:

* **Parameter sharing**: CNNs share the same set of filters across the input data, which reduces the number of parameters and improves generalization.
* **Spatial hierarchy**: CNNs build a spatial hierarchy of features, allowing them to capture features at multiple scales and resolutions.
* **Translation equivariance**: CNNs are designed to be translation equivariant, meaning that the output is unaffected by the position of the input data.

CNNs have been extremely successful in various computer vision tasks, such as image classification, object detection, and segmentation. They have also been used in natural language processing tasks, such as text classification and machine translation.