## Installing Dependencis

In [None]:
!pip -q install langchain openai tiktoken chromadb pypdf sentence_transformers InstructorEmbedding
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip -q install sentencepiece Xformers einops
!pip -q install unstructured pandoc

In [None]:
!pip show langchain

#Importing libraries

In [None]:
import torch
import transformers
from transformers import GenerationConfig, pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
import bitsandbytes as bnb

# Setting up LLM- Stable Vicuna 13B

In [None]:
import torch
import transformers
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline

tokenizer = LlamaTokenizer.from_pretrained("TheBloke/stable-vicuna-13B-HF")

model = LlamaForCausalLM.from_pretrained("TheBloke/stable-vicuna-13B-HF",
                                              load_in_8bit=True,
                                              device_map='auto',
                                              torch_dtype=torch.float16,
                                              low_cpu_mem_usage=True
                                              )

#Establishing Hugging Face pipeline

In [None]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
import torch

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=2048,
    temperature=0,
    top_p=0.95,
    repetition_penalty=1.15
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
from langchain.llms import HuggingFacePipeline
import torch

local_llm = HuggingFacePipeline(pipeline=pipe)

In [None]:
local_llm('When did bangladesh become independent? ')

## Setting up LangChain


In [None]:
import os



In [None]:
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader


## Loading multiple and process documents

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Load and process the text files
# loader = TextLoader('single_text_file.txt')
loader = DirectoryLoader('/content/drive/MyDrive/EkattorDB/ekattorhisdb/db', glob="./*.pdf", loader_cls=PyPDFLoader)

documents = loader.load()

In [None]:
len(documents)

In [None]:
#splitting the text into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [None]:
len(texts)

In [None]:
texts[370]

## Hugging Face E5 Embeddings

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings

model_name = "intfloat/e5-large-v2"

hf = HuggingFaceEmbeddings(model_name=model_name)

## Creating the VectorDB

In [None]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## Here is the nmew embeddings being used
embedding = hf #instructor_embeddings

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

## Making a retriever

In [None]:
retriever = vectordb.as_retriever()

In [None]:
docs = retriever.get_relevant_documents("When did bangladesh become independent?")

In [None]:
len(docs)

In [None]:
docs[0]

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

In [None]:
retriever.search_type

In [None]:
retriever.search_kwargs

# Creating the chain to answer questions

In [None]:


# llm = ChatOpenAI(temperature = 0.0)

qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)



In [None]:
print(qa_chain.combine_documents_chain.llm_chain.prompt.template)

In [None]:
# print(qa_chain.combine_documents_chain.llm_chain.prompt.messages[0].prompt.template)

## Prompting template for the LLM

In [None]:
qa_chain.combine_documents_chain.llm_chain.prompt.template = '''
Your name is EkattorQA. You are an expert at Bangladesh History.
Use the following pieces of context to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Always answer from the perspective of being Bangladesh History.
----------------
{context}

Question: {question}
Helpful Answer:'''

## Prosessing the llm responses

In [None]:
def trim_string(input_string):
    input_string = str(input_string)
    trim_index = input_string.find("### Human:")
    if trim_index != -1:  # If the phrase is found
        return input_string[:trim_index]
    else:
        return input_string  # If the phrase isn't found, return the original string

In [None]:
## Cite sources

import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    temp_resp = wrap_text_preserve_newlines(llm_response['result'])
    temp_resp = trim_string(temp_resp)
    print(temp_resp)
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

#Evaluation of **60 Questions** based on 3 difficulty level(Easy, Medium, and Hard)

In [None]:
#Example in English
query = "When did bangladesh become independent? "
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who was Sir Cyril Radcliffe?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the outcome of the demand for greater autonomy in East Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the impact of the partition on migration?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who succeeded Liaquat Ali Khan as prime minister?v"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the cause of the 1965 clash between India and Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "How many seats did the Awami League win in the 1970 elections in East Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the outcome of the disagreement between the ruling authorities in Pakistan and the Awami League over autonomy for East Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What were the local paramilitary forces in Bangladesh that supported the Pakistani cause called?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

## Hard p2

In [None]:
#Example in English
query = "What was the outcome of the disagreement between the ruling authorities in Pakistan and the Awami League over autonomy for East Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "How does present-day Bangladesh compare to Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What is the significance of the liberation war in the context of Bangladeshi history?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Why did Suhrawardy stay in India instead of working in the Pakistani government?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Did the migration during the partition result in the creation of new cultural and linguistic diasporas?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the outcome of the Language Movement?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the significance of the amendment motion moved by Dhirendranath Datta?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "How has the liberation war shaped the mindset of Bangladeshi people?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Why is it important to understand the history of Pakistani colonialism to analyze present-day Bangladeshi politics and socio-economic sphere?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "How did the creation of Pakistan impact the Indian subcontinent?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

## Medium p2

In [None]:
#Example in English
query = "What was the context of Mujib's repeated imprisonment by the military?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What happened as Bangladeshi retribution against the pro-Pakistani forces ensued?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was 'the Mukti Sanggha'?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What responsibility was entrusted to Liaquat Ali Khan after the decision to partition of India was taken?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Was the Lahore Resolution successful in achieving its goals?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the role of the constituent assembly in Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "When did the Language Movement reach its climax?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the demand of the students and intellectuals of East Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who were the key players in the Language Movement?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What responsibility was entrusted to Liaquat Ali Khan after the decision to partition of India was taken?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

## Easy p2

In [None]:
#Example in English
query = "Was the six-point demand for East Pakistani autonomy a historic demand?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was Yahya Khan's role in Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "How many seats did the Awami League win in the 1970 elections in East Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who is Liaquat Ali Khan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was Yahya Khan's role in Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "How many seats did the Awami League win in the 1970 elections in East Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who started a different resistance movement?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who replaced Yahya Khan as the leader of Pakistan after the conflict?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the Bengali Razakar force called?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

## Hard p1

In [None]:
#Example in English
query = "What was the impact of the partition on migration?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the attitude of the governments of India and Pakistan towards the migration during the partition?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Did the migration during the partition result in the formation of new communities and identities?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Was the migration during the partition a spontaneous or organized process?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What were some of the reasons for Bengali resentment towards the Pakistani government?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the attitude of the Pakistani government towards the appropriation of provincial functions and revenue?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the overriding common interest of the two parts of Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the result of Ghulam Mohammad's consolidation of power?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the common interest that the two parts of Pakistan shared in the early years of the country's formation?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the role of the United States in the conflict between India and Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

## Medium p1

In [None]:
#Example in English
query = "How were the frontiers of East Bengal ultimately determined?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the outcome of the referendum in Sylhet?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Was the delimitation of East Bengal a smooth and uncontroversial process?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Why did people move into what they perceived as a place of refuge?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Was migration during the partition a one-time event or a prolonged process?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the main reason for the migration during the partition?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the initial system of government in Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the common interest of the two parts of Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was the outcome of Mujib's demand for East Pakistani autonomy?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

## Easy p1

In [None]:
#Example in English
query = "What were the boundaries of East Bengal based on?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What language did Jinnah and his advisers believe could achieve unification in Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who was appointed as the prime minister after the adoption of the new constitution, in east Pakistan?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What country was Pakistan part of in 1947?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who became the leading figure of the Awami League after Suhrawardy's death in 1963?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "What was Mujibur Rahman's popular name?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who ordered the elections in Pakistan in December 1970?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Was equal recruitment from the two wings national policy?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who came to power in Pakistan in 1958?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

In [None]:
#Example in English
query = "Who was the leader of the Pakistan People's Party in the 1970 elections?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

QA from Excel

In [None]:
# import pandas as pd

# # Read the Excel file
# df = pd.read_excel('your_file_name.xlsx')

# # Iterate over each row in the dataframe
# for index, row in df.iterrows():
#     # Get the query from the 'Question' column
#     query = row['Question']

#     # Get the LLM response
#     llm_response = qa_chain(query)

#     # Process the LLM response
#     answer = process_llm_response(llm_response)

#     # Write the answer to the 'EkattorAns' column
#     df.at[index, 'EkattorAns'] = answer

# # Save the modified dataframe to a new Excel file
# df.to_excel('modified_file_name.xlsx', index=False)
