In [46]:
# Import necessary libraries
import warnings
from dotenv import load_dotenv
import pdfplumber
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_groq import ChatGroq

In [47]:
# Load environment variables
load_dotenv()
warnings.filterwarnings("ignore")

In [48]:
# Define the hate crimes types

HATE_CRIMES_TYPE = {
    'anti_religious_def.pdf': 'Anti-religious Hate Crime',
    'racist_def.pdf': 'Racist and Xenophobic Hate Crime',
    'gender_lgbt_def.pdf': 'Gender and LGBTQ+ Hate Crime'
}

In [49]:
# Step 1: Load PDFs using pdfplumber
def extract_text_from_pdf(pdf_path):
    """
    Extracts text from a PDF using pdfplumber and returns a list of Document objects.
    """
    extracted_text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                extracted_text += page_text + "\n"
    return [Document(page_content=extracted_text, metadata={"source": pdf_path})]

In [50]:
# Load PDFs for each category
anti_religious_docs = extract_text_from_pdf('/Users/sayo/personal_projects/Usafe_bot/data/anti_religious_def.pdf')
gender_lgbt_docs = extract_text_from_pdf('/Users/sayo/personal_projects/Usafe_bot/data/gender_lgbt_def.pdf')
racist_docs = extract_text_from_pdf('/Users/sayo/personal_projects/Usafe_bot/data/racist_def.pdf')

In [51]:
# Step 2: Chunk Documents
def chunk_documents(documents, chunk_size=1000, chunk_overlap=100):
    """
    Splits documents into smaller chunks for embedding.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return text_splitter.split_documents(documents)


In [53]:
# Chunk each document category
anti_religious_chunks = chunk_documents(anti_religious_docs)
gender_lgbt_chunks = chunk_documents(gender_lgbt_docs)
racist_chunks = chunk_documents(racist_docs)

# Print the number of chunks for each document
print(f"Number of chunks in anti-religious document: {len(anti_religious_chunks)}")
print(f"Number of chunks in gender/LGBT document: {len(gender_lgbt_chunks)}")
print(f"Number of chunks in racist document: {len(racist_chunks)}")

# Calculate total number of chunks across all documents
total_chunks = len(racist_chunks) + len(gender_lgbt_chunks) + len(anti_religious_chunks)
print(f"Total number of chunks: {total_chunks}")

Number of chunks in anti-religious document: 167
Number of chunks in gender/LGBT document: 42
Number of chunks in racist document: 37
Total number of chunks: 246


In [54]:
all_chunks = anti_religious_chunks + gender_lgbt_chunks + racist_chunks

In [55]:
# Step 3: Create Embedding Vector Store
def create_vector_store(chunks, data_base_name='usafe_combined'):
    """
    Creates a vector store using HuggingFace embeddings and saves it locally.
    """
    embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
    vector_store = FAISS.from_documents(chunks, embedding=embedding_model)
    vector_store.save_local(f"./vector_databases/{data_base_name}")
    print(f"Vector store '{data_base_name}' created and saved.")

In [56]:
# Create vector store for combined data
create_vector_store(all_chunks, data_base_name='usafe_combined')

Vector store 'usafe_combined' created and saved.


In [57]:
# Step 4: Load Vector Store and Create Retriever
def load_vector_store(data_base_path):
    """
    Loads a vector store from a local directory and returns a retriever.
    """
    embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
    vector_store = FAISS.load_local(folder_path=data_base_path, embeddings=embedding_model, allow_dangerous_deserialization=True)
    return vector_store.as_retriever()

In [58]:
# Initialize retriever for combined data
retriever_combined = load_vector_store('./vector_databases/usafe_combined')
retriever_general = load_vector_store('./vector_databases/usafe_general')

In [59]:
print(type(retriever_combined))
print(type(retriever_general))

<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>
<class 'langchain_core.vectorstores.base.VectorStoreRetriever'>


In [14]:
#with open('/Users/sayo/personal_projects/Usafe_bot/data/usafe_prompt.txt', 'r') as file:
    #user_prompt = file.read()

#print(user_prompt)

In [60]:
def connect_chains(retriever):
    """
    this function connects stuff_documents_chain with retrieval_chain
    """
    stuff_documents_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=stuff_documents_chain
    )
    return retrieval_chain

In [61]:
# Step 5: Initialize LLM
def initialize_llm(model_name="llama3-8b-8192"):
    """
    Initializes the LLM model with specified configurations.
    """
    return ChatGroq(model=model_name, temperature=0.02, max_tokens=None, timeout=None, max_retries=2)

llm = initialize_llm()


In [63]:
# Step 6: Setup Retrieval Chain
def setup_retrieval_chain(retriever):
    """
    Sets up the document chain and retrieval chain.
    """
    stuff_chain = create_stuff_documents_chain(llm=llm, prompt=hub.pull("langchain-ai/retrieval-qa-chat"))
    return create_retrieval_chain(retriever=retriever, combine_docs_chain=stuff_chain)

In [64]:
usafe_combined_retrieval_chain = setup_retrieval_chain(retriever_combined)
usafe_general_retrieval_chain = setup_retrieval_chain(retriever_general)

In [72]:
# Step 7: Detect Hate Crime Type
def detect_hate_crime_type(inquiry, retrieval_chain=usafe_combined_retrieval_chain):
    """
    Detects the type of hate crime based on user input
    """
    result = retrieval_chain.invoke({"input": inquiry})
    detected_type = HATE_CRIMES_TYPE.get(result['context'][0].dict()['metadata']['source'].split('/')[-1], "Unknown")
    return detected_type

In [80]:
# Step 8: Interactive Query Function
def handle_user_query(inquiry):
    """
    Handles user query by detecting hate crime type and offering options.
    """
    detected_type = detect_hate_crime_type(inquiry)
    print(f"\nDetected Hate Crime Type: {detected_type}")

    # Present user options
    print("\nWhat information would you like to access?")
    print("1. Relevant Laws Germany")
    print("2. Local Resources and Support")
    print("3. Steps to Report a Crime in Germany")
    print("4. Generic Information")
    
    option = input("Enter your choice (1-4): ")

    # Define the query based on user selection
    pdf_query = ""
    vector_store_to_query = None
    
    if option == '1':
        pdf_query = "Relevant laws related to hate crimes in Germany"
        vector_store_to_query = usafe_general_retrieval_chain  
    elif option == '2':
        pdf_query = "Local resources: NGOs, Legal Aid, Counseling, etc to support hate crime victims"
        vector_store_to_query = usafe_general_retrieval_chain
    elif option == '3':
        pdf_query = "Steps on how to report a hate crime in Germany"
        vector_store_to_query = usafe_general_retrieval_chain
    elif option == '4':
        pdf_query = "General information on hate crimes, psychological effects, and resources"
        vector_store_to_query = usafe_general_retrieval_chain  
    else:
        print("Invalid option. Please try again.")
        return

    # Retrieve and print response from the selected vector store
    if vector_store_to_query:
        response = vector_store_to_query.invoke({"input": pdf_query})
        print("\nResponse:\n", response['answer'].strip("\n"))



In [81]:
user_input = "I was walking on the streets and got attacked for holding hands with my partner."
handle_user_query(user_input)


Detected Hate Crime Type: Gender and LGBTQ+ Hate Crime

What information would you like to access?
1. Relevant Laws Germany
2. Local Resources and Support
3. Steps to Report a Crime in Germany
4. Generic Information

Response:
 According to the provided context, the relevant laws related to hate crimes in Germany are:

1. German Criminal Code (StGB):
	* Section 46 StGB: Allows courts to consider discriminatory motives (e.g., hatred based on race, religion, or sexual orientation) when sentencing.
	* Section 130 StGB: Criminalizes incitement to hatred, hate speech, and violent actions against people based on race, ethnicity, or nationality.
	* Section 166 StGB: Criminalizes the public defamation of religious groups or beliefs if it poses a risk to public peace.
2. Basic Law (Grundgesetz, GG):
	* Article 3: Prohibits discrimination based on race, ethnicity, or national origin.
	* Article 4: Ensures freedom of religion.
3. International Convention on the Elimination of All Forms of Racial

In [73]:
# Query based on context
user_input = "i got, How do I report a hate crime?"
if "report" in user_input:
    response = usafe_general_retrieval_chain.invoke({"input": user_input})
else:
    response = usafe_combined_retrieval_chain.invoke({"input": user_input})

print(response['answer'])

According to the provided context, you can report a hate crime in Germany by following these steps:

1. Document the Incident:
	* Gather as much information as possible about the event, including:
		+ Photos or Videos: Visual evidence of the incident or its aftermath.
		+ Witness Statements: Contact information and statements from any witnesses.
		+ Descriptions: Detailed accounts of what happened, including time, location, and any identifying features of the perpetrator.
2. Preserve Digital Evidence:
	* Save any online messages, emails, or social media posts related to the incident. Screenshots can be valuable evidence if hate speech or threats were made online.
3. Visit Your Local Police Station:
	* Bring all collected documentation with you. Explain the details of the incident, and let the officer know you believe it to be a hate crime.
4. Report Online (Optional):
	* If you’re unable to visit the police station, you may be able to file a report online through local authorities’ web

In [66]:
print(detect_hate_crime_type("Somebody punched me in the face because i'm jewish"))

Racist and Xenophobic Hate Crime


In [41]:
def handle_user_query(inquiry, relevant_laws=False, resources_available=False, 
                      steps_how_to_report_crime=False, general_info=False):
    """
    Handles user query by detecting hate crime type and responding based on predefined flags.
    """
    detected_type = detect_hate_crime_type(inquiry)
    print(f"\nDetected Hate Crime Type: {detected_type}")

    # Respond based on the Boolean flags
    if relevant_laws:
        print("\nYou selected: Relevant Laws Germany.")
    if resources_available:
        print("\nYou selected: Local Resources and Support.")
    if steps_how_to_report_crime:
        print("\nYou selected: Steps to Report a Crime in Germany.")
    if general_info:
        print("\nYou selected: Generic Information.")

    # If none of the flags are True
    if not any([relevant_laws, resources_available, steps_how_to_report_crime, general_info]):
        print("No valid information selected. Please review your flags.")

# Example usage:
handle_user_query(
    inquiry="I have experienced discrimination at work.",
    relevant_laws=False,
    resources_available=False,
    steps_how_to_report_crime=False,
    general_info=True
)


Detected Hate Crime Type: Gender and LGBTQ+ Hate Crime

You selected: Generic Information.


In [76]:
# Step 8: Interactive Query Function
def handle_user_query(inquiry):
    """
    Handles user query by detecting hate crime type and offering options.
    """
    detected_type = detect_hate_crime_type(inquiry)
    print(f"\nDetected Hate Crime Type: {detected_type}")

    # Present user options
    print("\nWhat information would you like to access?")
    print("1. Relevant Laws Germany")
    print("2. Local Resources and Support")
    print("3. Steps to Report a Crime in Germany")
    print("4. Generic Information")
    
    option = input("Enter your choice (1-4): ")

    # using the option selected by the user to query the pdf
    pdf_query = ""
    if option == '1':
        pdf_query = "Relevant laws related to hate crimes in Germany"
    elif option == '2':
        pdf_query = "Local resources: NGOs, Legal Aid, Counseling, etc to support hate crime victims"
    elif option == '3':
        pdf_query = "Steps on how to report a hate crime in Germany"
    elif option == '4':
        pdf_query = "General information on hate crimes, psychological effects, and resources"
    else:
        print("Invalid option. Please try again.")
        return
    
     # Add metadata to specify the PDF file you want to query
    metadata_filter = {"pdf_name": "general.pdf"}

    # Retrieve and print response from the vector store
    response = usafe_retrieval_chain.invoke({"input": pdf_query})
    print("\nResponse:\n", response['answer'].strip("\n"))

In [77]:
handle_user_query("I was called a monkey and beaten up by a group of people")


Detected Hate Crime Type: Racist and Xenophobic Hate Crime

What information would you like to access?
1. Relevant Laws Germany
2. Local Resources and Support
3. Steps to Report a Crime in Germany
4. Generic Information

Response:
 Based on the context, here are some local resources that may be available to support hate crime victims:

1. NGOs:
	* Organizations that provide support to asylum seekers and refugees, such as the Asylum Seekers' Association or the Refugee Council.
	* Anti-racism organizations, such as the Anti-Racism Network or the European Network Against Racism.
2. Legal Aid:
	* The local bar association or legal aid organization may provide free or low-cost legal assistance to hate crime victims.
	* The police may also have a dedicated unit or officer who can provide legal support and guidance to victims.
3. Counseling:
	* The local mental health services or counseling centers may offer trauma counseling and support to hate crime victims.
	* Organizations that provide s

In [53]:
# Step 8: Interactive Query Function (No Response Retrieval)
def handle_user_query(inquiry):
    """
    Handles user query by detecting hate crime type and offering options.
    """
    detected_type = detect_hate_crime_type(inquiry)
    print(f"\nDetected Hate Crime Type: {detected_type}")

    # Present user options
    print("\nWhat information would you like to access?")
    print("1. Relevant Laws Germany")
    print("2. Local Resources and Support")
    print("3. Steps to Report a Crime in Germany")
    print("4. Generic Information")
    
    option = input("Enter your choice (1-4): ")

    # Confirm the selected option without invoking a response
    if option == '1':
        print("\nYou selected: Relevant Laws Germany.")
    elif option == '2':
        print("\nYou selected: Local Resources and Support.")
    elif option == '3':
        print("\nYou selected: Steps to Report a Crime in Germany.")
    elif option == '4':
        print("\nYou selected: Generic Information.")
    else:
        print("Invalid option. Please try again.")

In [54]:
handle_user_query("I was attacked because of my sexual orientation. What can I do?")


Detected Hate Crime Type: Gender and LGBTQ+ Hate Crime

What information would you like to access?
1. Relevant Laws Germany
2. Local Resources and Support
3. Steps to Report a Crime in Germany
4. Generic Information

You selected: Relevant Laws Germany.


In [57]:
def detect_hate_type(
    inquiry,
    retrieval_chain=react_retrieval_chain
):
    result = retrieval_chain.invoke({"input": inquiry})
    hate_type = HATE_CRIMES_TYPE[result['context'][0].dict()['metadata']['source'].split('/')[-1]]
    return hate_type


NameError: name 'react_retrieval_chain' is not defined

In [58]:
hate_type = 'anti-religious'

In [None]:
print(detect_hate_type("I was attacked because of my religion"))

anti-religious hate crime


In [None]:
hate_type=detect_hate_type("")

In [62]:
relevant_laws = False
resources_available = False
steps_how_to_report_crime = False 
general_info = True


In [63]:
query = f"""
    I have been facing a hate crime of type {hate_type}.
    {'Please give me some legal advice.' * relevant_laws}
    {'Please tell me what are the local resources available.' * resources_available}
    {'Please explain the steps on how to report a hate crime.' * steps_how_to_report_crime}
    {'Please provide me with some general information.' * general_info}
    """

In [64]:
query

'\n    I have been facing a hate crime of type anti-religious.\n    \n    \n    \n    Please provide me with some general information.\n    '

In [66]:
import re

In [69]:
query = re.sub(r'\n', '', query)