In [None]:
! pip install -q accelerate bitsandbytes safetensors langchain  datasets sentence-transformers chromadb youtube-transcript-api
! pip install -q  git+https://github.com/huggingface/transformers

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import YoutubeLoader

from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from transformers import  pipeline
import torch
import transformers
from torch import cuda

In [3]:
model_name = "bn22/Mistral-7B-Instruct-v0.1-sharded"

In [None]:
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
    device_map='auto' ,
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.bos_token_id = 0
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=2048,
    temperature=0,
    top_p=0.95,
    repetition_penalty=1.2
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

In [6]:
loader = YoutubeLoader("0phRae42lqY")
documents = loader.load()

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(documents)
vectordb = Chroma.from_documents(
    documents,
    embedding=embed_model,
    persist_directory="DB"
)

In [None]:
documents

In [9]:
qa_chain = RetrievalQA.from_chain_type(
    chain_type="stuff" ,
    llm=local_llm,
    retriever=vectordb.as_retriever(search_kwargs={"k": 1}),
    return_source_documents=False,
    verbose=False
)

In [None]:
query = 'what is the problem we want to solve ?	'

vectordb.similarity_search(
    query,  # the search query
    k=1  # returns top 3 most relevant chunks of text
)

In [None]:
response = qa_chain.run("what is the problem we want to solve ?")
print(response)