# Data

In [None]:
import json
with open('/kaggle/input/public-test-alqac1/law.json') as f:
    law = json.load(f)
with open('/kaggle/input/public-test-alqac1/private_test_TASK_2.json') as f:
    data = json.load(f)
with open('/kaggle/input/prompt-legal/prompts.json') as f:
    prompts = json.load(f)

In [None]:
prompts['essay'] = [p + "\n\nLưu ý: Chỉ cần trả lời đáp án, không cần giải thích" for p in prompts['essay']]
prompts['options'] = [p + "\n\nLưu ý: Chỉ có 1 đáp án đúng và chỉ cần trả lời A, B, C hoặc D, không cần giải thích" for p in prompts['options']]
prompts['truefalse'] = [p + "\n\nLưu ý: Chỉ cần trả lời Đúng hoặc Sai, không cần giải thích" for p in prompts['truefalse']]

In [None]:
def get_relevant_articles(relevant_articles, law_data):
    articles_content = []
    for i in range(len(relevant_articles)):
        for law in law_data:
            if law['id'] == relevant_articles[i]['law_id']:
                for article in law['articles']:
                    if article['id'] == relevant_articles[i]['article_id']:
                        articles_content.append(article['text'])
                        break
#             raise ValueError('Article not found') 
    return " ".join(articles_content)
print(get_relevant_articles(data[0]['relevant_articles'], law))

In [None]:
def prompt_format_data(data, prompt):
    if data['question_type'] == 'Trắc nghiệm':
        return prompt.format(articles = get_relevant_articles(data['relevant_articles'],law), 
                    question = data['text'], choices = data['choices']) 
    else:
        return prompt.format(articles = get_relevant_articles(data['relevant_articles'],law), 
                    question = data['text'])
print(prompt_format_data(data[92], prompts["essay"][-1]))

# LLAMA

In [None]:
!pip install -q groq

In [None]:
import os
import time
from groq import Groq

client = Groq(
    api_key='our-api-key'
)

In [None]:
def generate_answer_groq(prompt):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": prompt,
            }
        ],
        model="llama3-70b-8192",
        temperature=0,
        max_tokens=1024,
    )

    return chat_completion.choices[0].message.content

In [None]:
answer_llama = []
for i in range(0, len(data)):
    prompt = ""
    if data[i]["question_type"] == "Đúng/Sai":
        prompt = prompt_format_data(data[i], prompts["truefalse"][-1])
    elif data[i]["question_type"] == "Trắc nghiệm":
        prompt = prompt_format_data(data[i], prompts["options"][-1])
    else: 
        prompt = prompt_format_data(data[i], prompts["essay"][-1])
    response = generate_answer_groq(prompt)
    print(data[i]['question_id'], ':', response) 
    answer_llama.append({"question_id": data[i]['question_id'],
                        "answer": response})


# Gemini

In [None]:
pip install -q -U google-generativeai

In [None]:
import google.generativeai as genai
import os
import time
genai.configure(api_key="our-api-key")

model = genai.GenerativeModel('gemini-1.5-flash')

In [None]:
config = genai.GenerationConfig(
    max_output_tokens=1024, temperature=0, top_p=1, top_k=32
)

In [None]:
answer_gemini = []
for i in range(0, len(data)):
    prompt = ""
    if data[i]["question_type"] == "Đúng/Sai":
        prompt = prompt_format_data(data[i], prompts["truefalse"][-1])
    elif data[i]["question_type"] == "Trắc nghiệm":
        prompt = prompt_format_data(data[i], prompts["options"][-1])
    else: 
        prompt = prompt_format_data(data[i], prompts["essay"][-1])
    response = model.generate_content(prompt, generation_config=config).text
    print(data[i]['question_id'], ':', response) 
    answer_gemini.append({"question_id": data[i]['question_id'],
                        "answer": response})
    time.sleep(5)


# GPT

In [None]:
pip install openai

In [None]:
OPENAI_API_KEY='our-api-key'
from openai import OpenAI

client = OpenAI(
  api_key=OPENAI_API_KEY,
)

In [None]:
def completion_gpt(prompt):
    completion = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "user", "content": prompt}
        ], temperature=0
    )
    return str(completion.choices[0].message.content)

In [None]:
answer_gpt = []
for i in range(0, len(data)):
    prompt = ""
    if data[i]["question_type"] == "Đúng/Sai":
        prompt = prompt_format_data(data[i], prompts["truefalse"][-1])
    elif data[i]["question_type"] == "Trắc nghiệm":
        prompt = prompt_format_data(data[i], prompts["options"][-1])
    else: 
        prompt = prompt_format_data(data[i], prompts["essay"][-1])
    response = completion_gpt(prompt)
    print(data[i]['question_id'], ':', response) 
    answer_gpt.append({"question_id": data[i]['question_id'],
                        "answer": response})


# Post processing

In [None]:
# format answer
def preprocessing(model_answer):
    assert len(model_answer) == len(data)
    for i in range(len(data)):
        type_question = data[i]['question_type']
        if type_question == 'Tự luận':
            # replace '\n' by ' ' -> strip
            model_answer[i]["answer"] = model_answer[i]["answer"].replace('\n', ' ').strip()

        elif type_question == 'Trắc nghiệm':
            # pass
            # replace Đáp án: -> '',
            model_answer[i]["answer"] = model_answer[i]["answer"].replace('Đáp án: ', '').strip()
            model_answer[i]["answer"] = model_answer[i]["answer"].replace('Đáp án đúng là: ', '').strip()
            model_answer[i]["answer"] = model_answer[i]["answer"].replace('**', '').strip()
            model_answer[i]["answer"] = model_answer[i]["answer"].replace('Đáp án ', '').strip()
            model_answer[i]["answer"] = model_answer[i]["answer"].replace('đúng là ', '').strip()
        elif type_question == 'Đúng/Sai':
            pass

    return model_answer

answer_llama = preprocessing(answer_llama)
answer_gemini = preprocessing(answer_gemini)
answer_gpt = preprocessing(answer_gpt)

In [None]:
# get answer from answer format

def get_answer(model_answer_preprocess):
    assert len(model_answer_preprocess) == len(data)
    for i in range(len(data)):
        type_question = data[i]['question_type']
        if type_question == 'Đúng/Sai':
            if model_answer_preprocess[i]["answer"].lower().__contains__('đúng'):
                # data[i]['answer'] = "Đúng"
                model_answer_preprocess[i]["answer"] = 'Đúng'
            elif model_answer_preprocess[i]["answer"].lower().__contains__('sai'):
                # data[i]['answer'] = "Sai"
                model_answer_preprocess[i]["answer"] = 'Sai'
            else:
                print(model_answer_preprocess[i]["answer"])
        elif type_question == 'Trắc nghiệm':
            if model_answer_preprocess[i]["answer"].lower()[0] == 'a':
                # data[i]['answer'] = 'A'
                model_answer_preprocess[i]["answer"] = 'A'
            elif model_answer_preprocess[i]["answer"].lower()[0] == 'b':
                # data[i]['answer'] = 'B'
                model_answer_preprocess[i]["answer"] = 'B'
            elif model_answer_preprocess[i]["answer"].lower()[0] == 'c':
                # data[i]['answer'] = 'C'
                model_answer_preprocess[i]["answer"] = 'C'
            elif model_answer_preprocess[i]["answer"].lower()[0] == 'd':
                # data[i]['answer'] = 'D'
                model_answer_preprocess[i]["answer"] = 'D'
            else:
                print(model_answer_preprocess[i]["answer"])
                raise ValueError('Answer not found')
            
        elif type_question == 'Tự luận':
            # data[i]['answer'] = model_answer_preprocess[i]
            model_answer_preprocess[i]["answer"] = model_answer_preprocess[i]["answer"]

    return model_answer_preprocess

answer_llama = get_answer(answer_llama)
answer_gemini = get_answer(answer_gemini)
answer_gpt = get_answer(answer_gpt)

In [None]:
answer_gpt

# Save to array

In [None]:
print(len(answer_llama))
with open('llama_v1.json', 'w', encoding='utf-8') as f:
    json.dump(answer_llama, f, ensure_ascii=False, indent=4)

In [None]:
print(len(answer_gemini))
with open('gemini_v1.json', 'w', encoding='utf-8') as f:
    json.dump(answer_gemini, f, ensure_ascii=False, indent=4)

In [None]:
print(len(answer_gpt))
with open('gpt_v1.json', 'w', encoding='utf-8') as f:
    json.dump(answer_gpt, f, ensure_ascii=False, indent=4)

# Ensemble

In [None]:
with open('llama_v1.json') as f:
    model_1 = json.load(f)
with open('gemini_v1.json') as f:
    model_2 = json.load(f)
with open('gpt_v1.json') as f:
    model_3 = json.load(f)
    
# If you want to ensemble more result files, the code to read additional files is here.

In [None]:
list_answers = [model_1, model_2, model_3]

In [None]:
new_answers = []
for i in range(len(list_answers[0])):
    if "TL" in list_answers[0][i]["question_id"]:
        new_answers.append({"question_id": list_answers[0][i]["question_id"],
                          "answer": list_answers[2][i]["answer"]})
    elif "DS" in answers[0][i]["question_id"]:
        answer = [list_answers[j][i]["answer"] for j in range(len(list_answers))]
        num_y = answer.count('Đúng')
        num_n = answer.count('Sai')
        if num_y > num_n:
            new_answers.append({"question_id": list_answers[0][i]["question_id"],
                          "answer": 'Đúng'})
        else:
            new_answers.append({"question_id": list_answers[0][i]["question_id"],
                          "answer": 'Sai'})
    else:
        answer = [list_answers[j][i]["answer"] for j in range(len(list_answers))]
        num_A = answer.count('A')
        num_B = answer.count('B')
        num_C = answer.count('C')
        num_D = answer.count('D')
        mx = max([num_A, num_B, num_C, num_D])
        if mx == num_A:
            new_answers.append({"question_id": list_answers[0][i]["question_id"],
                          "answer": 'A'})
        elif mx == num_B:
            new_answers.append({"question_id": list_answers[0][i]["question_id"],
                          "answer": 'B'})
        elif mx == num_C:
            new_answers.append({"question_id": list_answers[0][i]["question_id"],
                          "answer": 'C'})
        elif mx == num_D:
            new_answers.append({"question_id": list_answers[0][i]["question_id"],
                          "answer": 'D'})
len(list_answers[0]), len(new_answers)

In [None]:
with open('ensemble.json', 'w', encoding='utf-8') as f:
    json.dump(new_answers, f, ensure_ascii=False, indent=4)