In [1]:
!pip install transformers peft langchain 
!pip install -U langchain-community
!pip install sentence-transformers
!pip install faiss-gpu
!pip install py7zr

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)
Collecting langchain
  Downloading langchain-0.2.6-py3-none-any.whl.metadata (7.0 kB)
Collecting langchain-core<0.3.0,>=0.2.10 (from langchain)
  Downloading langchain_core-0.2.10-py3-none-any.whl.metadata (6.0 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.82-py3-none-any.whl.metadata (13 kB)
Collecting packaging>=20.0 (from transformers)
  Downloading packaging-24.1-py3-none-any.whl.metadata (3.2 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith<0.2.0,>=0.1.17->langchain)
  Downloading orjson-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.7/49.7 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Downloading peft-0.11.1-py3-none-

In [2]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from datasets import concatenate_datasets, load_dataset
import gc
# import gradio as gr
from peft import PeftModel, PeftConfig
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.docstore.document import Document
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import torch
import random
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.memory import ConversationBufferMemory
import requests
import re
from langchain.text_splitter import SpacyTextSplitter

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

2024-07-01 08:49:44.748607: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-01 08:49:44.748717: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-01 08:49:44.885604: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [None]:
# Load Samsum dataset for generating questions
train_dataset = load_dataset("samsum", split='train', trust_remote_code=True)
val_dataset = load_dataset("samsum", split='validation', trust_remote_code=True)
samsum_dataset = concatenate_datasets([train_dataset, val_dataset])

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to(device)
rlhf_model_path = "/kaggle/input/models/final_model"
config = PeftConfig.from_pretrained(rlhf_model_path)
ppo_model = PeftModel.from_pretrained(base_model, rlhf_model_path).to(device)
merged_model = ppo_model.merge_and_unload().to(device)

base_model.eval()
ppo_model.eval()
merged_model.eval()

In [None]:
dialogsum_dataset = load_dataset("knkarthick/dialogsum", trust_remote_code=True)

def format_dialogsum_as_document(example):
    return Document(page_content=f"Dialogue:\n {example['dialogue']}\n\nSummary: {example['summary']}")

# Create documents from DialogSum dataset
documents = []
for split in ['train', 'validation', 'test']:
    documents.extend([format_dialogsum_as_document(example) for example in dialogsum_dataset[split]])

# Split the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=5200, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Create embeddings and vector store for DialogSum documents
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={"device": "cuda" if torch.cuda.is_available() else "cpu"},
    encode_kwargs={"batch_size": 32}
)

vector_store = FAISS.from_documents(docs, embeddings)

# Initialize retriever for DialogSum documents
retriever = vector_store.as_retriever(search_kwargs={"k": 1})

prompt_template = """
Concisely summarize the dialogue in the end, like the example provided -

Example -
{context}

Dialogue to be summarized:
{question}

Summary:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
# Create a Hugging Face pipeline
summarization_pipeline = pipeline(
    "summarization",
    model=merged_model,
    tokenizer=tokenizer,
    max_length=150,
    min_length=20,
    do_sample=False,
)

# Wrap the pipeline in a LangChain LLM
llm = HuggingFacePipeline(pipeline=summarization_pipeline)

from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm, retriever=retriever, chain_type_kwargs={"prompt": PROMPT}
)

question = """
Amanda: I baked cookies. Do you want some?
Jerry: Sure!
Amanda: I'll bring you tomorrow :-)
"""

result = qa_chain({"query": question})
result["result"]