In [1]:
# Import necessary libraries
import warnings
from dotenv import load_dotenv
import pdfplumber
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_groq import ChatGroq

In [2]:
# Load environment variables
load_dotenv()

True

In [3]:
# Suppress warnings
warnings.filterwarnings("ignore")

In [69]:
# Step 1: PDF Extraction
def extract_text_from_pdf(pdf_path):
    """
    Extracts text from a PDF file using pdfplumber.
    Returns a list of Document objects.
    """
    extracted_text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                extracted_text += page_text + "\n"
    return [Document(page_content=extracted_text, metadata={"source": pdf_path})]

# Load the general PDF with extracted text
general_docs = extract_text_from_pdf('/Users/sayo/personal_projects/Usafe_bot/data/general_one.pdf')

In [70]:
# Step 2: Document Chunking
def chunk_documents(documents, chunk_size=1000, chunk_overlap=100):
    """
    Splits documents into smaller chunks for embedding.
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return text_splitter.split_documents(documents=documents)

In [72]:
# Chunk the general document
general_chunks = chunk_documents(general_docs)
print(f"Chunks created: {len(general_chunks)}")

Chunks created: 15


In [74]:
# Step 3: Embedding and Vector Store Creation
def create_vector_store(doc_chunks, db_name='usafe_general'):
    """
    Creates a vector store using HuggingFace embeddings and saves it locally.
    """
    embeddings_model = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2'
    )
    vector_store = FAISS.from_documents(doc_chunks, embedding=embeddings_model)
    vector_store.save_local(f"./vector_databases/{db_name}")


In [75]:
# Create the vector store specifically for the general information
create_vector_store(general_chunks, db_name='usafe_general')

In [76]:
# Step 4: Vector Store Retrieval
def load_vector_store(db_path='./vector_databases/vector_db_usafe_general'):
    """
    Loads a vector store from a specified path and returns a retriever.
    """
    embeddings_model = HuggingFaceEmbeddings(
        model_name='sentence-transformers/all-mpnet-base-v2'
    )
    vector_store = FAISS.load_local(
        folder_path=db_path,
        embeddings=embeddings_model,
        allow_dangerous_deserialization=True
    )
    return vector_store.as_retriever()

In [78]:
# Initialize retriever for general information
general_info_retriever = load_vector_store()

In [44]:
type(general_retriever)

langchain_core.vectorstores.base.VectorStoreRetriever

In [60]:
def connect_chains(retriever):
    """
    this function connects stuff_documents_chain with retrieval_chain
    """
    stuff_documents_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    retrieval_chain = create_retrieval_chain(
        retriever=retriever,
        combine_docs_chain=stuff_documents_chain
    )
    return retrieval_chain

In [79]:
# Step 5: LLM and Chain Connection
def initialize_llm(model_name="llama3-8b-8192"):
    """
    Initializes the LLM model with specified configurations.
    """
    return ChatGroq(
        model=model_name,
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2
    )

In [80]:
# Load LLM
llm = initialize_llm()

def setup_retrieval_chain(retriever):
    """
    Sets up the document chain and retrieval chain.
    """
    stuff_chain = create_stuff_documents_chain(
        llm=llm,
        prompt=hub.pull("langchain-ai/retrieval-qa-chat")
    )
    return create_retrieval_chain(retriever=retriever, combine_docs_chain=stuff_chain)

In [81]:
usafe_retrieval_chain = setup_retrieval_chain(general_info_retriever)

In [82]:
def query_usafe_bot(user_input, retrieval_chain=usafe_retrieval_chain):
    """
    Queries the Usafe ChatBot with a user's input and prints the response.
    """
    response = retrieval_chain.invoke({"input": user_input})
    print(response['answer'].strip("\n"))

In [83]:
query_usafe_bot("What are the steps i can follow to get resources to help me feel better after a hate crime?")

Based on the provided context, here are the steps you can follow to get resources to help you feel better after a hate crime:

1. Prepare for Language Barriers:
	* If you don't speak German, consider bringing someone who speaks German to assist with translation at the police station.
	* If you don't know someone who can assist, you may contact organizations that provide translation support for hate crime victims, such as ReachOut Berlin.
2. Visit Your Local Police Station:
	* Bring all collected documentation with you.
	* Explain the details of the incident and inform the officer that you believe it's a hate crime.
3. Seek Legal Assistance:
	* If you want to take your case to court, click on the link for legal protections and free community assistance.
4. Mental Health Support:
	* Click on the link for mental health professionals or other communities who can support you through this difficult time.
5. Local Resources:
	* NGOs, Legal Aid, Counseling, and other resources are available. C

In [84]:
query_usafe_bot("I faced a hate crime and I need help with what to do next.")

I'm so sorry to hear that you've been a victim of a hate crime. It's important to take immediate action to ensure your safety and well-being. Here are some steps you can take:

1. Prepare for Language Barriers: If you don't speak German, consider bringing someone who speaks German to assist with translation at the police station. Alternatively, you can contact organizations that provide translation support for hate crime victims, such as ReachOut Berlin.
2. Visit Your Local Police Station: Bring all collected documentation with you and explain the details of the incident to the officer. Let them know that you believe it to be a hate crime.
3. Report Online (Optional): If you're unable to visit the police station, you may be able to file a report online through local authorities' websites or specific online reporting platforms. For Berlin residents, you can use the Berlin Police Online Reporting Portal to report incidents anonymously or directly.
4. Follow Up on Your Report: After filin

In [85]:
query_usafe_bot("what are the psychological effects of a hate crime on the victim?")

According to the context, the psychological effects of a hate crime on the victim can include:

* Trauma
* Depression
* Low self-esteem
* Symptoms of Post-Traumatic Stress Disorder (PTSD)
* Avoidance behaviors

Additionally, the context mentions that hate crimes can also have a broader impact on the targeted group, including:

* Increased fear and vulnerability
* Negative impacts on mental and physical health
* Changes in behavior and social interactions
* Decreased sense of safety and security

It's also noted that hate crimes can have a damaging effect on the broader community, leading to:

* Division and weakened multicultural society
* Potential for intergroup conflict and hostility
* Negative impacts on social cohesion and community relations
