## Importing libraries

In [2]:
import pandas as pd
from tqdm import tqdm
import ollama
import re
from hurry.filesize import size
import time
from IPython.display import clear_output
import json
import os

## Loading datasets

In [None]:
cti_mcq_ds = pd.read_csv("data\\cti-mcq.tsv", sep="\\t", engine='python')
cti_rcm_ds_2024 = pd.read_csv("data\\cti-rcm.tsv", sep="\\t", engine='python')
cti_rcm_ds_2021 = pd.read_csv(
    "data\\cti-rcm-2021.tsv", sep="\\t", engine='python')
cti_taa_ds = pd.read_csv("data\\cti-taa.tsv", sep="\\t", engine='python')
cti_vsp_ds = pd.read_csv("data\\cti-vsp.tsv", sep="\\t", engine='python')

we define a dictionary of the tasks and their datasets

In [6]:
tasks_and_datasets_dict = {
    "cti-mcq": cti_mcq_ds,
    "cti-rcm-2024": cti_rcm_ds_2024,
    "cti-rcm-2021": cti_rcm_ds_2021,
    "cti-taa": cti_taa_ds,
    "cti-vsp": cti_vsp_ds
}

we define a dictionary of the models to evaluate

In [7]:
models_to_evaluate_dict = {
    "Gemma2-9b-Q8:gemma29bq8 "
}

In [None]:
for i, model in enumerate(models_to_evaluate_dict.keys()):
    print(
        f"Evaluating {model} model, ({i+1}/{len(models_to_evaluate_dict.values())})")

    for task in tasks_and_datasets_dict.keys():

        model_task_responses_and_stats_dict = dict()
        model_task_responses_and_stats_dict[f"{task} responses"] = list()

        for j in tqdm(range(len(tasks_and_datasets_dict[task])), desc=f"Generating {task} responses..."):
            # Generating answer
            if i < len(cti_mcq_ds):
                response = ollama.generate(model=models_to_evaluate_dict[model], prompt=tasks_and_datasets_dict[task].loc[j, "Prompt"], options={
                                           "temperature": 0, "top_p": 1, "num_predict": 1000}, keep_alive=3600)
            else:
                response = ollama.generate(model=models_to_evaluate_dict[model], prompt=tasks_and_datasets_dict[task].loc[j, "Prompt"], options={
                                           "temperature": 0, "top_p": 1, "num_predict": 1000}, keep_alive=0)

            model_task_responses_and_stats_dict[f"{task} responses"].append(
                {"index": j, "response": response})

        # Define the directory name
        output_directory = f"responses/{model}/Full responses and stats/"

        try:
            # Create the directory
            os.makedirs(output_directory, exist_ok=True)
            print(f"Directory '{output_directory}' created successfully.")
            with open(f"{output_directory}{task} generations.json", "w") as f:
                json.dump(model_task_responses_and_stats_dict, f, indent=4)
        except OSError as e:
            print(f"Error creating directory: {e}")

        del model_task_responses_and_stats_dict

In [8]:
fine_tuning_eval = {
    "Llama3.1_ft_v1": "Llama3.1_ft_1:latest",
    "Llama3.1_ft_v1.1": "Llama3.1_ft_2:latest",
}

In [None]:
for i, model in enumerate(fine_tuning_eval.keys()):
    print(
        f"Evaluating {model} model, ({i+1}/{len(fine_tuning_eval.values())})")

    for task in tasks_and_datasets_dict.keys():

        model_task_responses_and_stats_dict = dict()
        model_task_responses_and_stats_dict[f"{task} responses"] = list()

        for j in tqdm(range(len(tasks_and_datasets_dict[task])), desc=f"Generating {task} responses..."):
            # Generating answer
            if i < len(cti_mcq_ds):
                response = ollama.generate(model=fine_tuning_eval[model], prompt=tasks_and_datasets_dict[task].loc[j, "Prompt"], options={
                                           "temperature": 0, "top_p": 1}, keep_alive=3600)
            else:
                response = ollama.generate(model=fine_tuning_eval[model], prompt=tasks_and_datasets_dict[task].loc[j, "Prompt"], options={
                                           "temperature": 0, "top_p": 1}, keep_alive=0)

            model_task_responses_and_stats_dict[f"{task} responses"].append(
                {"index": j, "response": response})

        # Define the directory name
        output_directory = f"responses/{model}/Full responses and stats/"

        try:
            # Create the directory
            os.makedirs(output_directory, exist_ok=True)
            print(f"Directory '{output_directory}' created successfully.")
            with open(f"{output_directory}{task} generations.json", "w") as f:
                json.dump(model_task_responses_and_stats_dict, f, indent=4)
        except OSError as e:
            print(f"Error creating directory: {e}")

        del model_task_responses_and_stats_dict

In [None]:
for i, model in enumerate(models_to_evaluate_dict.keys()):
    print(
        f"Evaluating {model} model, ({i+1}/{len(models_to_evaluate_dict.values())})")

    for task in tasks_and_datasets_dict.keys():

        model_task_responses_and_stats_dict = dict()
        model_task_responses_and_stats_dict[f"{task} responses"] = list()

        for j in tqdm(range(len(tasks_and_datasets_dict[task])), desc=f"Generating {task} responses..."):
            # Generating answer
            if i < len(cti_mcq_ds):
                response = ollama.generate(model=models_to_evaluate_dict[model], prompt=tasks_and_datasets_dict[task].loc[j, "Prompt"], options={
                                           "temperature": 0, "top_p": 1}, keep_alive=3600)
            else:
                response = ollama.generate(model=models_to_evaluate_dict[model], prompt=tasks_and_datasets_dict[task].loc[j, "Prompt"], options={
                                           "temperature": 0, "top_p": 1}, keep_alive=0)

            model_task_responses_and_stats_dict[f"{task} responses"].append(
                {"index": j, "response": response})

        # Define the directory name
        output_directory = f"C:/internship/responses/{model}/Full responses and stats/"

        try:
            # Create the directory
            os.makedirs(output_directory, exist_ok=True)
            print(f"Directory '{output_directory}' created successfully.")
            with open(f"{output_directory}{task} generations.json", "w") as f:
                json.dump(model_task_responses_and_stats_dict, f, indent=4)
        except OSError as e:
            print(f"Error creating directory: {e}")

        del model_task_responses_and_stats_dict