In [None]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import Chroma
from langchain import HuggingFacePipeline, PromptTemplate
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer
from langchain.chains import RetrievalQA
import torch
import warnings
from transformers import set_seed
from IPython.display import clear_output, display, Markdown
from dotenv import load_dotenv

In [None]:
warnings.filterwarnings('ignore')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
set_seed(42)

In [None]:
MAX_NEW_TOKENS = 128
directory = "./pdf"
model_name = "meta-llama/Llama-2-7b-chat-hf"

In [None]:
def file2doc(directory:str) -> list:
  return DirectoryLoader(directory, glob="*.pdf", loader_cls=PyPDFLoader).load()

In [None]:
def split_text(docs:list) -> list:
  return RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)

In [None]:

import os
os.environ['GOOGLE_API_KEY'] 

In [None]:
modelPath = "sentence-transformers/all-MiniLM-l6-v2"
model_kwargs = {'device':'cpu'}
encode_kwargs = {'normalize_embeddings': False}
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [None]:
# embedding=GPT4AllEmbeddings(model_kwargs={"device": "cuda"})
def vector_storing(splitted_text):
  return Chroma.from_documents(documents=splitted_text,
                               embedding=GPT4AllEmbeddings(model_kwargs={"device": "cuda"}),
                               persist_directory='db'
                               )

In [None]:
vectorstore = vector_storing(split_text(file2doc(directory)))

In [None]:
def query_vectorstore(prompt,vectorstore):
  return vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6}).get_relevant_documents(prompt)

In [None]:
query_vectorstore("Güneş nedir?",vectorstore)

tokenizer = AutoTokenizer.from_pretrained(model_name, padding=True, truncation=True, max_length=512, cache_dir="./model",)
question_answerer = pipeline(
    "question-answering", 
    model=model_name, 
    tokenizer=tokenizer,
    cache_dir="./model",
    device_map="auto"
)

llm = HuggingFacePipeline(
    pipeline=question_answerer,
    model_kwargs={"temperature": 0.3, "max_length": 512},
)

max_memory = f'{int(torch.cuda.mem_get_info()[0]/1024**3)-2}GB'

n_gpus = torch.cuda.device_count()
max_memory = {i: max_memory for i in range(n_gpus)}

quantization_config = BitsAndBytesConfig(load_in_4bit=True)
llm = AutoModelForCausalLM.from_pretrained(
  model_name,
  cache_dir="./model",
  device_map="auto",
  quantization_config=quantization_config
  )
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", cache_dir="./model")

llm = AutoModelForCausalLM.from_pretrained(
  model_name,
  cache_dir="./model",
  device_map="auto"
  )
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", cache_dir="./model")

In [None]:
from huggingface_hub import login
login()

In [None]:
from langchain_community.llms import HuggingFaceHub

llm=HuggingFaceHub(
  repo_id=model_name,
  model_kwargs={"temperature":0.7},
  huggingfacehub_api_token=os.environ["HUGGIGFACEHUB_API_KEY"]
)

def chat_with_llama(prompt):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    input_ids = input_ids.to('cuda')
    output = llm.generate(input_ids, max_length=256, num_beams=4, no_repeat_ngram_size=2)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

while True:
    prompt = input("You: ")
    response = chat_with_llama(prompt)
    print("Llama:", response)

text = 'Hamburg is in which country?\n'
tokenizer = LlamaTokenizer.from_pretrained(model_name)
input_ids = tokenizer(text, return_tensors="pt").input_ids

generated_ids = llm.generate(input_ids, max_length=MAX_NEW_TOKENS)
print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))

In [None]:
prompt_template = """
  Soruyu verilen bağlama göre en anlaşılır ve detaylı şekilde cevapla.
  Gelen sorular karşılaştırma sorusu, genel sorular veya direk bilgi istenen sorular olabilir.
  Karşılaştırma sorularına bağlamdan anlamlı bir sonuç çıkararak cevap vereceksin.
  Soruları yanıtlarken sadece Türklerin bakış açısından cevapla.
  Sana sağlanan dokümanlarda bilgisi bulunmayan bir bağlama yanıt olarak "Metinde bilgi bulunmamaktadır" veya "Bilmiyorum" cevabını vereceksin.

  Context:\n {context}?\n
  Question: \n{question}\n
  
  Answer:
"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

"""import os
os.environ['GOOGLE_API_KEY'] =  GOOGLE_API_KEY
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-pro")"""
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
import textwrap
def to_markdown(text):
  text = text.replace('•','*')
  return Markdown(textwrap.indent(text, '>', predicate=lambda _: True))

In [None]:
display(to_markdown(llm.invoke("Güneş nedir?").content))

In [None]:
def ask(question):
    print(qa_chain({"query": question})['result'])

In [None]:
while(True):
  question = input("Please ask a question: ")
  if question == '':
    break
  else:
    clear_output(wait=True)
    Markdown(ask(question))