In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
HATE_CRIMES_TYPE = {
	'anti_religious_def.pdf': 'anti-religious hate crime',
	'racist_def.pdf': 'racist and xenophobic hate crime',
	'gender_lgbt_def.pdf': 'gender and lgbt hate crime'
}

In [3]:
from langchain_community.document_loaders import PyPDFLoader

def load_pdf_data(pdf_path):
    """
    this function loads text data from pdf file
    """
    loader = PyPDFLoader(file_path=pdf_path)
    documents = loader.load()
    return documents

In [4]:

anti_religious = load_pdf_data(pdf_path='/Users/sayo/personal_projects/Usafe_bot/data/anti_religious_def.pdf')
gender_lgbt = load_pdf_data(pdf_path='/Users/sayo/personal_projects/Usafe_bot/data/gender_lgbt_def.pdf')
general = load_pdf_data(pdf_path='/Users/sayo/personal_projects/Usafe_bot/data/general.pdf')
racist = load_pdf_data(pdf_path='/Users/sayo/personal_projects/Usafe_bot/data/racist_def.pdf')



In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

def split_documents(documents, chunk_size=800, chunk_overlap=80):
    """
    this function splits documents into chunks of given size and overlap
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_documents(documents=documents)
    return chunks

In [6]:
# Split each loaded document into chunks
anti_religious_chunks = split_documents(anti_religious)
gender_lgbt_chunks = split_documents(gender_lgbt)
general_chunks = split_documents(general)
racist_chunks = split_documents(racist)

# Print the number of chunks for each document
print(f"Number of chunks in anti-religious document: {len(anti_religious_chunks)}")
print(f"Number of chunks in gender/LGBT document: {len(gender_lgbt_chunks)}")
print(f"Number of chunks in general document: {len(general_chunks)}")
print(f"Number of chunks in racist document: {len(racist_chunks)}")

# Calculate total number of chunks across all documents
total_chunks = len(general_chunks) + len(racist_chunks) + len(gender_lgbt_chunks) + len(anti_religious_chunks)
print(f"Total number of chunks: {total_chunks}")

Number of chunks in anti-religious document: 226
Number of chunks in gender/LGBT document: 58
Number of chunks in general document: 23
Number of chunks in racist document: 48
Total number of chunks: 355


In [7]:
all_chunks = anti_religious_chunks + gender_lgbt_chunks + general_chunks + racist_chunks

In [8]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

def create_embedding_vector_db(chunks, db_name):
    """
    this function uses the open-source embedding model HuggingFaceEmbeddings 
    to create embeddings and store those in a vector database called FAISS, 
    which allows for efficient similarity search
    """
    # instantiate embedding model
    embedding = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2'
    )
    # create the vector store 
    vectorstore = FAISS.from_documents(
        documents=chunks,
        embedding=embedding
    )
    # save vector database locally
    vectorstore.save_local(f"./vector_databases/vector_db_{db_name}")

In [9]:
create_embedding_vector_db(chunks=all_chunks, db_name='usafe_combined')

  from tqdm.autonotebook import tqdm, trange


In [10]:
def retrieve_from_vector_db(vector_db_path):
    """
    this function spits out a retriever object from a local vector database
    """
    # instantiate embedding model
    embeddings = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2'
    )
    react_vectorstore = FAISS.load_local(
        folder_path=vector_db_path,
        embeddings=embeddings,
        allow_dangerous_deserialization=True
    )
    retriever = react_vectorstore.as_retriever()
    return retriever

In [11]:
combined_retriever = retrieve_from_vector_db(vector_db_path='./vector_databases/vector_db_usafe_combined')

In [12]:
type(combined_retriever)

langchain_core.vectorstores.base.VectorStoreRetriever

In [None]:
#with open('/Users/sayo/personal_projects/Usafe_bot/data/usafe_prompt.txt', 'r') as file:
    #user_prompt = file.read()

#print(user_prompt)

## Generation 

[`create_stuff_documents_chain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html#langchain.chains.combine_documents.stuff.create_stuff_documents_chain)

- takes a list of documents and formats them all into a prompt, then passes that prompt to an LLM
- passes ALL documents, so you should make sure it fits within the context window of the LLM being used

[`create_retrieval_chain`](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval.create_retrieval_chain.html#langchain.chains.retrieval.create_retrieval_chain)

- takes in a user inquiry, which is then passed to the retriever to fetch relevant documents
- those documents (and original inputs) are then passed to an LLM to generate a response

In [13]:
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

In [14]:
def connect_chains(retriever):
    """
    this function connects stuff_documents_chain with retrieval_chain
    """
    stuff_documents_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=stuff_documents_chain
    )
    return retrieval_chain

In [15]:
import warnings
warnings.filterwarnings("ignore")
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama3-8b-8192",
    temperature=0.02,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

In [16]:
react_retrieval_chain = connect_chains(combined_retriever)

In [56]:
def print_output(
    inquiry,
    retrieval_chain=react_retrieval_chain
):
    result = retrieval_chain.invoke({"input": inquiry})
    print(result['answer'].strip("\n"))

In [57]:
print_output("What is a hate crime?")

According to the provided context, a hate crime (also known as a bias crime) is a crime where a perpetrator targets a victim due to their physical appearance or perceived membership in a specific social group. These groups may include race, ethnicity, disability, language, nationality, political views, age, religion, sex, gender identity, or sexual orientation.


In [58]:
print_output("i was harassed because i'm a muslim, what shall i do?")

I'm so sorry to hear that you're experiencing harassment because of your religion. It's unacceptable and illegal. Here are some steps you can take:

1. Report the incident to the authorities: File a police report and provide as much detail as possible about the incident, including the date, time, location, and any witnesses.
2. Seek support from a trusted friend or family member: Talking to someone you trust can help you process your emotions and feel supported.
3. Reach out to a Muslim organization or community center: Many Muslim organizations and community centers have resources and support services for victims of harassment and discrimination.
4. Consider seeking counseling: Harassment and discrimination can be emotionally draining, and counseling can help you cope with the emotional impact.
5. Document the incident: Keep a record of the incident, including any evidence you have, such as photos, videos, or witness statements.
6. Consider reporting the incident to a hate crime hotli

In [17]:
def detect_hate_type(
    inquiry,
    retrieval_chain=react_retrieval_chain
):
    result = retrieval_chain.invoke({"input": inquiry})
    hate_type = HATE_CRIMES_TYPE[result['context'][0].dict()['metadata']['source'].split('/')[-1]]
    return hate_type


In [44]:
print(detect_hate_type("I was attacked because of my religion"))

anti-religious hate crime


In [46]:
hate_type=detect_hate_type("")

In [47]:
relevant_laws = False
resources_available = False
steps_how_to_report_crime = False 
general_info = False


In [48]:
    query = f"""
    I have been facing a hate crime of type {hate_type}.
    {'Please give me some legal advice.' * relevant_laws}
    {'Please tell me what are the local resources available.' * resources_available}
    {'Please explain the steps on how to report a hate crime.' * steps_how_to_report_crime}
    {'Please provide me with some general information.' * general_info}
    """

In [49]:
query

'\nI have been facing a hate crime of type anti-religious hate crime.\n\n\n\n\n'

In [50]:
'sayo'*False

''

In [51]:
print(response)

Hate
Crime
Definition
●
A
hate
crime
(also
known
as
a
bias
crime)
is
a
crime
where
a
perpetrator
targets
a
victim
due
to
their
physical
appearance
or
perceived
membership
in
a
specific
social
group.
Such
groups
may
include
race,
ethnicity ,
disability ,
language,
nationality ,
political
views,
age,
religion,
sex,
gender
identity ,
or
sexual
orientation.
Non-criminal
actions
motivated
by
these
biases
are
often
termed
“bias
incidents.”
•
Examples
of
hate
crimes
include:
•
Physical
assault,
homicide,
damage
to
property
•
Bullying,
harassment,
verbal
abuse,
offensive
graffiti,
or
hate
mail
History
of
Hate
Crimes
•
Term
Origin:
The
term
“hate
crime”
gained
common
usage
in
the
U.S.
during
the
1980s,
although
similar
crimes
have
historical
roots.
•
Historical
Examples:
•
Roman
persecution
of


In [52]:
test_query = "What should I do if I am being harassed for being Muslim?"
docs = combined_retriever.get_relevant_documents(test_query)
print("Documents Retrieved:", docs)

Documents Retrieved: [Document(metadata={'source': '/Users/sayo/personal_projects/Usafe_bot/data/anti_religious_def.pdf', 'page': 1}, page_content='-\nA\nBlack\nMuslim\nwoman\nwas\nsubjected\nto\nracist\nand\nanti-Muslim\nthreats\nand\ninsults\non\na\ntrain.\n-\nA\nmale\nMuslim\nactivist\nreceived\ndeath\nthreats\nvia\nemail.\nThis\nwas\none\nin\na\nseries\nof\nsimilar\nincidents.\n-\nA\nmale\nMuslim\nactivist\nreceived\na\nletter\ncontaining\nanti-Muslim\nand\nxenophobic\ninsults\nand\nthreats,\nas\nwell\nas\nNazi\nsymbols.\nThis\nwas\none\nin\na\nseries\nof\nsimilar\nincidents.\n-\nA\nmale\nMuslim\nactivist\nwas\nrepeatedly\nthreatened\non\nTwitter.\nThis\nwas\none\nin\na\nseries\nof\nsimilar\nincidents.\n-\nA\nMuslim\nwoman\nwas\nsubjected\nto\nanti-Muslim\ninsults,\nthreatened\nwith\na\nknife\nand\nphysically\nassaulted\nby\na\nperpetrator\nwho\nattempted\nto\nremove\nher\nheadscarf.\n-\nA\nMuslim-owned\nshop\nwas\nvandalized\nand\na\nwindow\nwas\nshattered\nwhen\nreligious\nmusic\

In [53]:
def detect_hate_type(inquiry, retriever=combined_retriever):
    """
    Detect the hate crime type based on the user's inquiry.
    """
    documents = retriever.get_relevant_documents(inquiry)
    if documents:
        # Extract the hate crime type from the metadata of the first document
        source_file = documents[0].metadata.get('source', '')
        if source_file:
            hate_type = HATE_CRIMES_TYPE.get(source_file.split('/')[-1], "Unknown Hate Crime")
            return hate_type
    return "No relevant information found."

In [54]:
def get_response_from_retrieval_chain(query, retriever=combined_retriever):
    """
    Retrieve documents based on the query and extract the relevant information.
    """
    documents = retriever.get_relevant_documents(query)
    
    if documents:
        # Print the content of the first document for debugging
        print("First Document Content:", documents[0].page_content)
        return documents[0].page_content
    
    return "No relevant information found."

In [55]:
# Detect the hate crime type based on the inquiry
hate_type = detect_hate_type("i have being harassed for being muslim")
print("Detected Hate Type:", hate_type)

# Generate a dynamic query based on the detected hate crime type
query = generate_dynamic_query(hate_type = False, relevant_laws=True)

# Retrieve a response using the retrieval chain
response = get_response_from_retrieval_chain(query)
print("Response:", response)

Detected Hate Type: anti-religious hate crime
First Document Content: Hate
Crime
Definition
●
A
hate
crime
(also
known
as
a
bias
crime)
is
a
crime
where
a
perpetrator
targets
a
victim
due
to
their
physical
appearance
or
perceived
membership
in
a
specific
social
group.
Such
groups
may
include
race,
ethnicity ,
disability ,
language,
nationality ,
political
views,
age,
religion,
sex,
gender
identity ,
or
sexual
orientation.
Non-criminal
actions
motivated
by
these
biases
are
often
termed
“bias
incidents.”
•
Examples
of
hate
crimes
include:
•
Physical
assault,
homicide,
damage
to
property
•
Bullying,
harassment,
verbal
abuse,
offensive
graffiti,
or
hate
mail
History
of
Hate
Crimes
•
Term
Origin:
The
term
“hate
crime”
gained
common
usage
in
the
U.S.
during
the
1980s,
although
similar
crimes
have
historical
roots.
•
Historical
Examples:
•
Roman
persecution
of
Response: Hate
Crime
Definition
●
A
hate
crime
(also
known
as
a
bias
crime)
is
a
crime
where
a
perpetrator
targets
a
victim
due
to
the