# Opensource LLB

In [None]:
%pip install langchain langchain-community

In [None]:
%pip install -U langchain-huggingface

In [None]:
from langchain_community.llms import HuggingFaceHub
import os
from getpass import getpass

# Prompt the user to securely input the Hugging Face API token
api_token = getpass("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")

# Set the token as an environment variable
os.environ['HUGGINGFACEHUB_API_TOKEN'] = api_token

# Verify the token is set (for debugging; optional)
print("Token successfully set!" if os.environ['HUGGINGFACEHUB_API_TOKEN'] else "Failed to set token.")



In [None]:
llm = HuggingFaceHub(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    model_kwargs={
        "max_new_tokens": 1024,
        "temperature": 0.1,
        "repetition_penalty": 1.1,
        "return_full_text":False
    },
)

In [None]:
query = "What is the meaning of life?"
print(llm.invoke(query))

# Prompt template

In [27]:
from langchain_core.prompts import ChatPromptTemplate

this json pattern takes in gpt, llama, claude and universal models

In [7]:
template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are assistant Flim Director, you will only answer movie related questions."),
        ("user", "{query}"),
    ]
)

In [None]:
irelevant_prompt = template.format_messages(query="Do you know anyting about peace?")
response = llm.invoke(irelevant_prompt)
print(response)

In [9]:
documented_template = ChatPromptTemplate.from_messages({"""
<|system|>
You are a friendly and knowledgeable filmmaker assistant. Avoid referencing being an AI, and instead respond like a human filmmaker sharing expertise.</s>
<|user|>
{query}
</s>
<|assistant|> """})

In [None]:
relevant_prompt = documented_template.format_messages(query="Do you know anyting about Friends?")
chatbot_response = llm.invoke(relevant_prompt)
print(chatbot_response)

## Output Parser

In [11]:
from langchain.output_parsers import ResponseSchema, StructuredOutputParser
import json

In [12]:
answer = ResponseSchema(name="answer", description="The answer to the question")
question = ResponseSchema(name="question", description="The question asked")
response_schema = [question,answer]

output_parser = StructuredOutputParser.from_response_schemas(response_schema)

In [13]:
format_instruct = output_parser.get_format_instructions()

In [14]:
documented_template_v2 = ChatPromptTemplate.from_messages({"""
<|system|>
{instruct}
You are a friendly and knowledgeable filmmaker assistant. Avoid referencing being an AI, and instead respond like a human filmmaker sharing expertise.</s>
<|user|>
{query} provide the answer in JSON
</s>
<|assistant|> """})

In [15]:
final_prompt = documented_template_v2.format_messages(instruct= format_instruct, 
                                                      query=" tell me about friends? ")

In [16]:
final_response = llm.invoke(final_prompt)

In [None]:
parser = output_parser.parse(final_response)
print(parser)

In [None]:
print("Raw Response:", final_response)

In [None]:
parsed_response = output_parser.parse(final_response)
print(json.dumps(parsed_response, indent=4))

## LangChain Expression Language

In [20]:
prompt = ChatPromptTemplate.from_messages({"""
<|system|>
You are a friendly and knowledgeable filmaker assistant. Avoid referencing being an AI, and instead respond like a human filmmaker sharing expertise.</s>
<|user|>
{query} provide the answer in JSON
</s>
<|assistant|> """})

In [34]:
from langchain_core.output_parsers import StrOutputParser

In [22]:
chain = prompt | llm | StrOutputParser() # pipe

In [None]:
print(chain.invoke({"query":"what is life"}))

## Streaming

In [None]:
print(chain.stream({"query":"what is life"}))

In [None]:
for chunk in response:
    print(chunk,end="", flush=True)

## Batching -LCEL

In [None]:
batch_response = chain.batch([{
    "query":"does tree talks"},{
    "query":"suggest me a drama that i must watch?"
}])

print(batch_response[1])

## Vector Database & embeddings

In [2]:
from langchain_community.embeddings import FastEmbedEmbeddings
from langchain_community.vectorstores import Chroma

In [None]:
embeddings = FastEmbedEmbeddings(model_name="thenlper/gte-large")

In [None]:
query_result = embeddings.embed_query("what is life")
print(query_result[0])

## Memory

In [15]:
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain_core.prompts import PromptTemplate

In [56]:
template =  """
You are assistant Flim Director, you will only answer movie related questions

{chatHistory}
user: {query}
Chatbot:
"""

In [58]:
prompt = PromptTemplate(template=template, input_variables={"chatHistory","query"})

In [59]:
memory = ConversationBufferMemory(memory_key="chatHistory")

In [60]:
llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    verbose=True,
    memory=memory,
)

In [None]:
llm_chain.predict(query="my name is Vai Bashar and my favourite anime is One Piece")

In [None]:
response = llm_chain.predict(query="what anime bashar loved?")

In [None]:
response

## Langchain RAG- Chat from my own document

#### Quran data in csv

In [8]:
import pandas as pd

# Load and preprocess the dataset
def preprocess_dataset(file_path, relevant_columns):
    """
    Preprocess the Quranic dataset by removing unnecessary columns.
    Args:
    - file_path: Path to the CSV file.
    - relevant_columns: List of columns to retain.

    Returns:
    - Processed DataFrame.
    """
    try:
        df = pd.read_csv(file_path)
        return df[relevant_columns]
    except Exception as e:
        print(f"Error in loading or processing dataset: {e}")
        return pd.DataFrame()  # Return an empty DataFrame if error occurs


In [9]:
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:

# Preprocess the dataset to load only relevant columns
relevant_columns = ['surah_name_roman', 'surah_name_en', 'ayah_no_surah', 'ayah_en']
quran_df = preprocess_dataset("TheQuranDataset.csv", relevant_columns)

# Convert the DataFrame to a list of dictionaries
quran_data = quran_df.to_dict(orient='records')
print(quran_data[:5])  # Print the first 5 documents to verify


In [None]:
# Load the CSV file using CSVLoader
csv_loader = CSVLoader(file_path="TheQuranDataset.csv")

# Load documents from the CSV file
quran_data = csv_loader.load()
print(quran_data[:5])  # Print the first 5 documents to verify

##### Need to feed chunks in lllm

In [13]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunks = text_splitter.split_documents(quran_data)

In [None]:
len(chunks)

In [None]:
%pip install chromadb
from langchain_community.vectorstores import Chroma

In [None]:
db = Chroma.from_documents(chunks,embeddings, persist_directory="db")
db.persist()

#### Django-rest Documentation

In [None]:
%pip install pypdf

from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
dj_doc_data = PyPDFLoader("django-api-tutorial-latest.pdf").load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
chunks = text_splitter.split_documents(dj_doc_data)
len(chunks) 

##### indexing

In [27]:
db = Chroma.from_documents(chunks,embeddings, persist_directory="db")
db.persist()

In [None]:
vector_store = Chroma(persist_directory="db",embedding_function=embeddings)

In [30]:
query = "what is Django-rest in micro service?"

In [40]:
query = "In Quran, Did mentioned Muhammad?"

In [None]:
mmr_result = vector_store.max_marginal_relevance_search(query)
mmr_result

In [None]:
retriever = vector_store.as_retriever(search_type="mmr")
print(retriever.invoke(query))

In [42]:
template_two = """
<|system|>
You are an AI Assistant that follows instructions extremely well.
Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in CONTEXT

CONTEXT: {context}
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

In [42]:
template = """
<|system|>
You are an AI Programmer Assistant that follows instructions extremely well.
Please be truthful and give direct answers. Please tell 'I don't know' if user query is not in CONTEXT

CONTEXT: {context}
</s>
<|user|>
{query}
</s>
<|assistant|>
"""

In [48]:
prompt = ChatPromptTemplate.from_template(template)
from langchain_core.runnables import RunnablePassthrough


In [28]:
prompt = ChatPromptTemplate.from_template(template_two)
from langchain_core.runnables import RunnablePassthrough


In [35]:
chain =(
    {"context": retriever,"query":  RunnablePassthrough()} 
    | prompt 
    | llm 
    | StrOutputParser()
    
)

In [43]:
response = chain.invoke({"query":"Does Muhammad mentioned in Quran?"})

In [61]:
response = chain.invoke({"query":"what is Django-rest in micro service?"})

In [None]:
response