In [1]:
# Install the necessary dependencies
!pip install xformer --quiet
!pip install chromadb --quiet
!pip install selenium --quiet
!pip install langchain --quiet
!pip install accelerate --quiet
!pip install transformers --quiet
!pip install bitsandbytes --quiet
!pip install unstructured --quiet
!pip install sentence-transformers --quiet

In [2]:
# Imports
import torch
from textwrap import fill
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
    )
from langchain import PromptTemplate
from langchain import HuggingFacePipeline
from langchain.vectorstores import Chroma
from langchain.schema import AIMessage, HumanMessage
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredMarkdownLoader, UnstructuredURLLoader
from langchain_community.document_loaders import UnstructuredURLLoader, SeleniumURLLoader
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Specify the model to be used
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

# Set the quantization configuration
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

# Initialize a tokenizer for the Mistral-7B model
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

# Set up the model parameters
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16,
    trust_remote_code=True,
    device_map="auto",
    quantization_config=quantization_config
)

# Specification of various generation parameters like max number of tokens
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

# Define the text generation pipeline
pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
# The HuggingFacePipeline class is used to interact with the models in HuggingFace
llm = HuggingFacePipeline(
    pipeline=pipeline,
    )

In [5]:
# Trying a sample query that will be answered based on the general knowledge of the model from pretraining
query = "What are some of the most famous momuments in Greece?"
result = llm(
    query
)

print(query)
print(result)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


What are some of the most famous momuments in Greece?


## Answer (1)

The Parthenon is probably the most famous monument in Greece. It's located on the Acropolis in Athens and was built between 447-432 BC as a temple to the goddess Athena.

Another famous monument is the Colosseum, which is located in Rome but was built by the Romans who conquered Greece. The Colosseum was completed in 80 AD and could hold up to 50,000 spectators.

Other notable monuments include the Statue of Zeus at Olympia, the Temple of Apollo at Delphi, and the Mausoleum at Halicarnassus.


In [6]:
# Setting up an embedding model
# The model that will be used is the GTE model which is also hosted on HuggingFace
embeddings = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},
)

In [7]:
# Creating a prompt template to help guide the answers of the LLM
template = """
Act as a history teacher who is teaching high school students.

{text}
"""

prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

In [8]:
# A sample query using the newly created prompt template
query = "Explain the basic concepts of democracy in ancient Greece in around 4-5 sentences."
result = llm(prompt.format(text=query))

print(query)
print(result)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Explain the basic concepts of democracy in ancient Greece in around 4-5 sentences.

Democracy in ancient Greece was based on the idea that every citizen had an equal say in government, regardless of their social status or wealth. This meant that all citizens were able to participate in decision making processes and vote on laws and policies. The concept of democracy was first introduced by Athenians in the 6th century BCE, and it quickly spread throughout other city-states in Greece. However, democracy was not without its flaws, and there were often conflicts between different groups with differing opinions. Despite these challenges, democracy remained a fundamental part of Greek society for centuries to come.


In [9]:
# Using some pages that were scraped from the internet as context. Selenium was used for the scraping
urls = [
    "https://en.wikipedia.org/wiki/Bechdel_test",
    "https://en.wikipedia.org/wiki/Alison_Bechdel",
]

loader = SeleniumURLLoader(urls=urls)
documents = loader.load()

len(documents)

2

In [10]:
# Splitting the scraped documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts_chunks = text_splitter.split_documents(documents)

len(texts_chunks)

77

In [11]:
# Creating the ChromaDB vector store
db = Chroma.from_documents(texts_chunks, embeddings, persist_directory="db")

In [12]:
# Customizing the template to use the context from the vector store
template = """
Use the provided information to answer the question posed at the end. If the answer cannot be inferred from the context, then you should answer 'I don't know'.

{context}

{question}
"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

# Setting up the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    # k=2 means that the 2 most relevant chunks will be used for the generated answer
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

In [13]:
# Trying a new query with the provided context
query = "What is the Josephs test?"
result_ = qa_chain(
    query
)
result = result_["result"].strip()


print(query)
print(result)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


What is the Josephs test?
The Josephs test is a set of criteria that can be used to evaluate whether a work of fiction accurately represents the lives of Orthodox Jews. It was proposed by the nonprofit organization Jewish In The City following a controversy over misrepresentation of Orthodox Judaism in television. The test consists of four questions:

1. Are there any Orthodox characters who are emotionally and psychologically stable?
2. Are there characters who are Orthodox whose religious life is a characteristic but not a plot point or a problem?
3. Can the Orthodox character find their Happily Ever After as a religious Jew?
4. And if the main plot points are in conflict due to religious observance — are any characters not Hasidic or Haredi and have the writers actually researched authentic religious observance from practicing members of the community they are attempting to portray?


In [14]:
# Implementing memory for the conversation
custom_template = """You are an AI assistant. Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. At the end of standalone question add this: 'Answer the question in English language.' If you do not know the answer reply with 'I am sorry, I dont have enough information'.
Chat History: {chat_history}
Follow Up Input: {question}
Standalone question:
"""

CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    memory=memory,
    condense_question_prompt=CUSTOM_QUESTION_PROMPT,
)

In [15]:
# Trying out a query
query = "What is the Josephs test?"
result_ = qa_chain({"question": query})
result = result_["answer"].strip()

print(query)
print(result)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


What is the Josephs test?
The Josephs Test is a set of criteria used to evaluate whether a fictional representation of Orthodox Jews accurately represents the community. It was proposed by the nonprofit organization Jewish In The City after a controversy over misrepresentation of Orthodox Judaism in television. The test includes four questions: Are there any Orthodox characters who are emotionally and psychologically stable? Are there characters who are Orthodox whose religious life is a characteristic but not a plot point or a problem? Can the Orthodox character find their Happily Ever After as a religious Jew? And if the main plot points are in conflict due to religious observance — are any characters not Hasidic or Haredi and have the writers actually researched authentic religious observance from practicing members of the community they are attempting to portray?


In [16]:
# Making sure that memory works as intended
query = "What other similar tests do you know of?"

result_ = qa_chain({"question": query})
result = result_["answer"].strip()

print(query)
print(result)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


What other similar tests do you know of?
There are no other similar tests specifically designed to evaluate the accuracy of fictional representations of religious communities. However, there are general guidelines and best practices for creating accurate and respectful representations of all communities, including religious ones. These include conducting research, consulting with members of the community being represented, avoiding stereotypes and caricatures, and striving for nuanced and complex portrayals. Additionally, some organizations may offer workshops or training sessions on how to create more accurate and respectful representations of specific communities.
