In [4]:
import json
from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import Bedrock
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from utils import CreateInferenceModifier
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


def extract_unique_urls(response):
    unique_urls = set()  # Use a set to store unique URLs

    # Iterate through each document in the 'context'
    for document in response['context']:
        source_url = document.metadata.get(
            'source')  # Extract the 'source' URL
        if source_url:
            unique_urls.add(source_url)  # Add the URL to the set

    # Convert the set of unique URLs to a string
    urls_string = '; '.join(unique_urls)

    return urls_string


# Define the vector databases for different languages
vector_db_paths = {
    'en': './data/processed/faq_data/vectordata/EN_NSYR',
    'tr': './data/processed/faq_data/vectordata/TR_NSYR',
    'ar': './data/processed/faq_data/vectordata/AR_NSYR',
    'fa': './data/processed/faq_data/vectordata/FA_NSYR',
    'ur': './data/processed/faq_data/vectordata/UR_NSYR',
    'ps': './data/processed/faq_data/vectordata/PS_NSYR',
    'ku': './data/processed/faq_data/vectordata/KU_NSYR',
    'som': './data/processed/faq_data/vectordata/SOM_NSYR'
}

embeddings = BedrockEmbeddings(
    model_id="amazon.titan-embed-text-v1", region_name="us-east-1"
)

# Function to initialize Chroma with the appropriate path


def initialize_chroma(path):
    return Chroma(persist_directory=path, embedding_function=embeddings)

# Function to retrieve the appropriate vector database based on language


def get_retriever_for_language(language_code):
    # Default to English if language not found
    path = vector_db_paths.get(language_code, vector_db_paths['en'])
    return initialize_chroma(path).as_retriever()


def query_asylum_system(question_data, rag_chain_from_docs):
    question_id = question_data['id']
    question = question_data['question']
    language_code = question_data['language']
    language_direction = question_data.get('direction', 'ltr')

    # Get the appropriate retriever for the language
    retriever = get_retriever_for_language(language_code)

    # Create a new instance of rag_chain_with_source with the correct retriever
    rag_chain_with_source = RunnableParallel(
        {"context": retriever, "question": RunnablePassthrough()}
    ).assign(answer=rag_chain_from_docs)

    # Invoke the LLM chain with the provided question
    response = rag_chain_with_source.invoke(question)

    # Extract the answer from the response
    answer = response["answer"]

    # Format the response
    formatted_response = {
        "question_id": question_id,
        "language_code": language_code,
        "direction": language_direction,
        "question": question,
        "answer": answer,
        "sources": extract_unique_urls(response)
    }

    return formatted_response


def run_queries_and_save_responses(questions, output_file,
                                   prompt_template=None,
                                   llm_model_id=None,
                                   llm_hyperparams=None):
    # Default prompt template
    if prompt_template is None:
        prompt_template = """You are a helpful chatbot that answers user inquiries. Drawing from your knowledge, answer the question below.
If you don't know the answer from your knowledge, say that your training materials don't include this information and suggest reaching out to Refugee Rights Turkey for more information.
Keep the answer as concise and relevant to the question as possible. Be sure to answer in the language used in the question.

Knowledge: {context}

Question: {question}

Helpful Answer:"""

    # Default LLM model ID
    if llm_model_id is None:
        llm_model_id = "anthropic.claude-v2:1"

    # Default LLM hyperparameters
    if llm_hyperparams is None:
        llm_hyperparams = {
            "max_tokens": 2000,
            "temperature": 0.2,
            "top_k": 250,
            "top_p": 1,
            "stop_sequences": ["\n\nHuman"]
        }

    # Initialize the LLM with the provided or default model and hyperparameters
    llm = Bedrock(model_id=llm_model_id, model_kwargs=CreateInferenceModifier(
        "claude", params=llm_hyperparams))

    # Define the prompt template
    custom_rag_prompt = PromptTemplate.from_template(prompt_template)

    # Define the rag_chain_from_docs
    rag_chain_from_docs = (
        RunnablePassthrough.assign(
            context=(lambda x: format_docs(x["context"])))
        | custom_rag_prompt
        | llm
        | StrOutputParser()
    )

    # List to store responses
    responses = []

    # Process each question and store the response
    with ThreadPoolExecutor() as executor:
        future_to_question = {executor.submit(
            query_asylum_system, question_data, rag_chain_from_docs): question_data for question_data in questions}

        for future in as_completed(future_to_question):
            try:
                response = future.result()
                responses.append(response)
            except Exception as e:
                print(
                    f"Error processing question {future_to_question[future]}: {e}")

    # Run metadata
    run_metadata = {
        "system_prompt": prompt_template,
        "llm_hyperparams": llm_hyperparams,
        "llm_model": llm.model_id,
        "embedding_model": embeddings.model_id,
        "embedding_hyperparams": embeddings.model_kwargs,
        "run_time": datetime.utcnow().isoformat() + "Z"  # UTC time in ISO format
    }

    # Save responses and metadata to a JSON file
    output_data = {
        "run_metadata": run_metadata,
        "responses": responses
    }

    with open(output_file, 'w') as f:
        json.dump(output_data, f, ensure_ascii=False, indent=4)

    print(f"Responses and run metadata saved to {output_file}")

In [1]:
questions = [
    {"id": 1, "question": "What are the asylum procedures in Turkey?", "language": "en"},
    {"id": 1, "question": "Türkiye'de iltica prosedürleri nelerdir?", "language": "tr"},
    {"id": 1, "question": "ما هي إجراءات اللجوء في تركيا؟",
        "language": "ar", "direction": "rtl"},
    {"id": 1, "question": "روند پناهندگی در ترکیه چگونه است؟",
        "language": "fa", "direction": "rtl"},
    {"id": 1, "question": "ترکی میں پناہ کی کارروائی کیا ہے؟",
        "language": "ur", "direction": "rtl"},
    {"id": 1, "question": "چۆنەتەوەی پەنابەرایەتی لە تورکیا",
        "language": "ku", "direction": "rtl"},
    {"id": 1, "question": "Waa maxay habraacyada magangalyada ee Turkiga?", "language": "som"},
    {"id": 1, "question": "په ترکیه کې د پناه غوښتنې کړنلاره څه ده؟",
        "language": "ps", "direction": "rtl"},

    {"id": 2, "question": "What rights do asylum-seekers have in Turkey?", "language": "en"},
    {"id": 2, "question": "Türkiye'deki iltica arayanların hakları nelerdir?",
        "language": "tr"},
    {"id": 2, "question": "ما هي حقوق طالبي اللجوء في تركيا؟",
        "language": "ar", "direction": "rtl"},
    {"id": 2, "question": "پناهجویان در ترکیه چه حقوقی دارند؟",
        "language": "fa", "direction": "rtl"},
    {"id": 2, "question": "ترکی میں پناہ کے متلاشی افراد کے حقوق کیا ہیں؟",
        "language": "ur", "direction": "rtl"},
    {"id": 2, "question": "مەگەسەران لە تورکیا چی مافێن؟",
        "language": "ku", "direction": "rtl"},
    {"id": 2, "question": "Maxay yihiin xuquuqda dadka magangalyada doonaya ee Turkiga?", "language": "som"},
    {"id": 2, "question": "په ترکیه کې د پناه غوښتونکو حقونه څه دي؟",
        "language": "ps", "direction": "rtl"},

    {"id": 3, "question": "How can I apply for asylum in Turkey?", "language": "en"},
    {"id": 3, "question": "Türkiye'de iltica başvurusunu nasıl yapabilirim?",
        "language": "tr"},
    {"id": 3, "question": "كيف يمكنني التقدم بطلب لجوء في تركيا؟",
        "language": "ar", "direction": "rtl"},
    {"id": 3, "question": "چگونه می توانم در ترکیه درخواست پناهندگی بدهم؟",
        "language": "fa", "direction": "rtl"},
    {"id": 3, "question": "میں ترکی میں پناہ کی درخواست کیسے دے سکتا ہوں؟",
        "language": "ur", "direction": "rtl"},
    {"id": 3, "question": "چۆن دەتوانم بۆ پەنابەرایەتی تاقیکردنەوە بکەم لە تورکیا؟",
        "language": "ku", "direction": "rtl"},
    {"id": 3, "question": "Sideen ugu dalban karaa magangalyo Turkiga?", "language": "som"},
    {"id": 3, "question": "زه څنګه په ترکیه کې د پناه غوښتنه کولی شم؟",
        "language": "ps", "direction": "rtl"},

    {"id": 4, "question": "What documents do I need for an asylum application?",
        "language": "en"},
    {"id": 4, "question": "İltica başvurusu için hangi belgelere ihtiyacım var?",
        "language": "tr"},
    {"id": 4, "question": "ما هي الوثائق المطلوبة لطلب اللجوء؟",
        "language": "ar", "direction": "rtl"},
    {"id": 4, "question": "برای درخواست پناهندگی به چه مدارکی نیاز دارم؟",
        "language": "fa", "direction": "rtl"},
    {"id": 4, "question": "پناہ کی درخواست کے لیے مجھے کون سے دستاویزات درکار ہیں؟",
        "language": "ur", "direction": "rtl"},
    {"id": 4, "question": "بۆ تاقیکردنەوەی پەنابەرایەتی پێویستە بە چ زەویەکانی؟",
        "language": "ku", "direction": "rtl"},
    {"id": 4, "question": "Waa maxay dukumentiyada aan u baahanahay codsiga magangalyada?", "language": "som"},
    {"id": 4, "question": "د پناه غوښتنې لپاره کومو اسنادو ته اړتیا لرم؟",
        "language": "ps", "direction": "rtl"},

    {"id": 5, "question": "Can I work while my asylum application is being processed?", "language": "en"},
    {"id": 5, "question": "İltica başvurum işleme alınırken çalışabilir miyim?",
        "language": "tr"},
    {"id": 5, "question": "هل يمكنني العمل أثناء معالجة طلب اللجوء الخاص بي؟",
        "language": "ar", "direction": "rtl"},
    {"id": 5, "question": "آیا می توانم در حالی که درخواست پناهندگی من در حال پردازش است کار کنم؟",
        "language": "fa", "direction": "rtl"},
    {"id": 5, "question": "کیا میں اپنی پناہ کی درخواست پر کارروائی کے دوران کام کر سکتا ہوں؟",
        "language": "ur", "direction": "rtl"},
    {"id": 5, "question": "دەتوانم کار بکەم کە تاقیکردنەوەی پەنابەرایەتی پێش چووە؟",
        "language": "ku", "direction": "rtl"},
    {"id": 5, "question": "Miyaan shaqeyn karaa inta codsiga magangalyada la shaqeynayo?", "language": "som"},
    {"id": 5, "question": "آیا زه کولای شم پداسې حال کې چې زما د پناه غوښتنې پروسس کیږي کار وکړم؟",
        "language": "ps", "direction": "rtl"},

    {"id": 6, "question": "What should I do if my asylum application is rejected?", "language": "en"},
    {"id": 6, "question": "İltica başvurum reddedilirse ne yapmalıyım?", "language": "tr"},
    {"id": 6, "question": "ماذا علي أن أفعل إذا رُفض طلبي للجوء؟",
        "language": "ar", "direction": "rtl"},
    {"id": 6, "question": "اگر درخواست پناهندگی من رد شد، چه کار باید بکنم؟",
        "language": "fa", "direction": "rtl"},
    {"id": 6, "question": "اگر میری پناہ کی درخواست مسترد ہو جائے تو مجھے کیا کرنا چاہیے؟",
        "language": "ur", "direction": "rtl"},
    {"id": 6, "question": "ئەگەر تاقیکردنەوەی پەنابەرایەتی من رد کرا، چ باید بکەم؟",
        "language": "ku", "direction": "rtl"},
    {"id": 6, "question": "Maxaan sameeyaa haddii codsigayga magangalyada la diido?",
        "language": "som"},
    {"id": 6, "question": "که زما د پناه غوښتنه رد شي، زه باید څه وکړم؟",
        "language": "ps", "direction": "rtl"},

    {"id": 7, "question": "How long does the asylum process take in Turkey?",
        "language": "en"},
    {"id": 7, "question": "Türkiye'deki iltica süreci ne kadar sürer?", "language": "tr"},
    {"id": 7, "question": "كم من الوقت يستغرق عملية اللجوء في تركيا؟",
        "language": "ar", "direction": "rtl"},
    {"id": 7, "question": "روند پناهندگی در ترکیه چقدر طول می کشد؟",
        "language": "fa", "direction": "rtl"},
    {"id": 7, "question": "ترکی میں پناہ کا عمل کتنی دیر لیتا ہے؟",
        "language": "ur", "direction": "rtl"},
    {"id": 7, "question": "پەروەردنی پەنابەرایەتی لە تورکیا چەند دەما پێوە؟",
        "language": "ku", "direction": "rtl"},
    {"id": 7, "question": "Intee in le'eg ayay qaadanaysaa nidaamka magangalyada ee Turkiga?", "language": "som"},
    {"id": 7, "question": "په ترکیه کې د پناه غوښتنې پروسه څومره وخت نیسي؟",
        "language": "ps", "direction": "rtl"},

    {"id": 8, "question": "Can I appeal a rejected asylum application in Turkey?", "language": "en"},
    {"id": 8, "question": "Türkiye'de reddedilen iltica başvurusuna itiraz edebilir miyim?", "language": "tr"},
    {"id": 8, "question": "هل يمكنني الطعن في طلب لجوء مرفوض في تركيا؟",
        "language": "ar", "direction": "rtl"},
    {"id": 8, "question": "آیا می توانم به درخواست پناهندگی رد شده در ترکیه اعتراض کنم؟",
        "language": "fa", "direction": "rtl"},
    {"id": 8, "question": "کیا میں ترکی میں مسترد شدہ پناہ کی درخواست کے خلاف اپیل کر سکتا ہوں؟",
        "language": "ur", "direction": "rtl"},
    {"id": 8, "question": "دەتوانم لە تاقیکردنەوەی پەنابەرایەتی ردکراوە بەرپرسە بنووسم لە تورکیا؟",
        "language": "ku", "direction": "rtl"},
    {"id": 8, "question": "Ma ku caban karaa codsi magangalyo oo la diiday Turkiga?",
        "language": "som"},
    {"id": 8, "question": "آیا زه په ترکیه کې د رد شوي پناه غوښتنې غوښتنلیک باندې اپیل کولی شم؟",
        "language": "ps", "direction": "rtl"},

    {"id": 9, "question": "Where can I find legal assistance for asylum seekers in Turkey?", "language": "en"},
    {"id": 9, "question": "Türkiye'de iltica arayanlar için yasal yardım nerede bulabilirim?", "language": "tr"},
    {"id": 9, "question": "أين يمكنني العثور على المساعدة القانونية لطالبي اللجوء في تركيا؟",
        "language": "ar", "direction": "rtl"},
    {"id": 9, "question": "کجا می توانم کمک حقوقی برای پناهجویان در ترکیه پیدا کنم؟",
        "language": "fa", "direction": "rtl"},
    {"id": 9, "question": "میں ترکی میں پناہ گزینوں کے لیے قانونی مدد کہاں سے حاصل کر سکتا ہوں؟",
        "language": "ur", "direction": "rtl"},
    {"id": 9, "question": "لەلایەن کوژی دەستبەری ئەنجامە بەرەدا کە بەرەسێت پەنابەرایەتی لە تورکیا؟",
        "language": "ku", "direction": "rtl"},
    {"id": 9, "question": "Xageen ka heli karaa caawimaad sharci oo loogu talagalay magangalyada Turkiga?", "language": "som"},
    {"id": 9, "question": "په ترکیه کې د پناه غوښتونکو لپاره قانوني مرستې چیرته موندلی شم؟",
        "language": "ps", "direction": "rtl"},

    {"id": 10, "question": "What are the housing options for asylum seekers in Turkey?", "language": "en"},
    {"id": 10, "question": "Türkiye'de iltica arayanlar için konaklama seçenekleri nelerdir?", "language": "tr"},
    {"id": 10, "question": "ما هي خيارات السكن لطالبي اللجوء في تركيا؟",
        "language": "ar", "direction": "rtl"},
    {"id": 10, "question": "گزینه های مسکن برای پناهجویان در ترکیه چیست؟",
        "language": "fa", "direction": "rtl"},
    {"id": 10, "question": "ترکی میں پناہ گزینوں کے لیے رہائش کے کیا اختیارات ہیں؟",
        "language": "ur", "direction": "rtl"},
    {"id": 10, "question": "وەکو گەژاوە لە تورکیا دەتوانم پەنابەرایەتی کەم؟",
        "language": "ku", "direction": "rtl"},
    {"id": 10, "question": "Waa maxay xulashooyinka hoyga ee loogu talagalay magangalyo-doonka Turkiga?", "language": "som"},
    {"id": 10, "question": "په ترکیه کې د پناه غوښتونکو لپاره د استوګنې اختیارونه څه دي؟",
        "language": "ps", "direction": "rtl"}
]

In [5]:
run_queries_and_save_responses(questions, 'v2_responses.json', llm_hyperparams= {
            "max_tokens": 8000,
            "temperature": 0.2,
            "top_k": 250,
            "top_p": 1,
            "stop_sequences": ["\n\nHuman"]
        })

Responses and run metadata saved to v2_responses.json
