https://medium.com/@nimritakoul01/chat-with-llama-2-7b-from-huggingface-llama-2-7b-chat-hf-d0f5735abfcf<br>
https://github.com/Lightning-AI/lit-gpt/blob/main/tutorials/download_llama_2.md<br>
https://ai.meta.com/blog/5-steps-to-getting-started-with-llama-2/<br>
https://github.com/thisserand/llama2_local<br>
https://www.youtube.com/watch?v=WzCS8z9GqHw<br>
https://medium.com/@martin-thissen/llama2-chat-on-your-local-computer-free-gpt-4-alternative-41b7638033a8<br>
https://www.youtube.com/watch?v=AOzMbitpb00<br>
youtube query = Llama-2-7b-chat


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import Chroma
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.schema import StrOutputParser
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnablePassthrough
from langchain_community.llms import HuggingFaceHub
import os
import warnings
from dotenv import load_dotenv
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from IPython.display import display, Markdown, clear_output
import textwrap

In [None]:
warnings.filterwarnings('ignore')

In [None]:
file_path = "./sample_01.pdf"

In [None]:
def to_markdown(text):
  text = text.replace('•','*')
  return Markdown(textwrap.indent(text, '>', predicate=lambda _: True))

In [None]:
def pdf2docs(file_path):
  return PyPDFLoader(file_path, extract_images=True).load()

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
def text_split(docs):
  return RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0).split_documents(docs)

In [None]:
def vector_store(splits):
  return Chroma.from_documents(documents=splits, embedding=GPT4AllEmbeddings())

In [None]:
def retriever(vector_index):
  return vector_index.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [None]:
def default_model(prompt):
    model = genai.GenerativeModel("gemini-pro")
    response = model.generate_content(prompt)
    display(to_markdown(response.text))

In [None]:
vector_index = vector_store(text_split(pdf2docs(file_path)))

In [None]:
retrieved_docs = retriever(vector_index).get_relevant_documents( "Güneş nedir?" )
print(len(retrieved_docs))
print(retrieved_docs[0].page_content)

In [None]:
llm=HuggingFaceHub(
  repo_id="meta-llama/Llama-2-7b-chat-hf",
  model_kwargs={"temperature":0.2},
  huggingfacehub_api_token=os.environ["HUGGIGFACEHUB_API_KEY"]
)

In [None]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
model = AutoModelForCausalLM.from_pretrained(
  "meta-llama/Llama-2-7b-chat-hf",
  cache_dir='./base_models/',
  device_map='auto'
)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",cache_dir='./base_models/')

In [None]:
prompt_template = """
  Soruyu verilen bağlama göre en anlaşılır ve detaylı şekilde cevapla.
  Gelen sorular karşılaştırma sorusu, genel sorular veya direk bilgi istenen sorular olabilir.
  Karşılaştırma sorularına bağlamdan anlamlı bir sonuç çıkararak cevap vereceksin.
  Soruları yanıtlarken sadece Türklerin bakış açısından cevapla.
  Sana sağlanan dokümanlarda bilgisi bulunmayan bir bağlama yanıt olarak "Metinde bilgi bulunmamaktadır" veya "Bilmiyorum" cevabını vereceksin.

  Context:\n {context}?\n
  Question: \n{question}\n

  Answer:
"""

In [None]:
condense_system_prompt = """Given a chat history and the latest user question \
which might reference the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
condense_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", condense_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
condense_chain = condense_prompt | llm | StrOutputParser()

message = condense_chain.invoke(
    {
        "chat_history": [
            HumanMessage(content="What does LLM stand for?"),
            AIMessage(content="Large language model in machine learning world"),
        ],
        "question": "What does LLM mean?",
    }
)

In [None]:
qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\
{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)


def condense_question(input: dict):
    if input.get("chat_history"):
        return condense_chain
    else:
        return input["question"]

In [None]:
QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template)

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever(vector_index),
    return_source_documents=False,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
def ask(question):
    print(qa_chain({"query": question})['result'])

In [None]:
while(True):
  question = input("Please ask a question: ")
  if question == '':
    break
  else:
    clear_output(wait=True)
    ask(question)
    #time.sleep(1)

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="meta-llama/Llama-2-7b-chat-hf")