In [4]:
from dotenv import load_dotenv
import os
import pandas as pd
import time
from tools import *
from make_data import load_dict_from_json
import sqlite3

# Load environment variables
load_dotenv()
deepseek_api = os.getenv('DEEPSEEK_API_KEY')

In [2]:
database = load_dict_from_json('data/M0A_train_data.json')
test_sample = select_random_keys(database)

In [3]:
themes = {
    "T1": "Conscience",
    "T2": "Desire",
    "T3": "Freedom",
    "T4": "Goodness",
    "T5": "Identity",
    "T6": "Justice",
    "T7": "Language",
    "T8": "Meaning",
    "T9": "Science",
    "T10": "Technology",
    "T11": "Truth",
    "T12": "Time",
    "T13": "Existence",
    "T14": "Music",
    "T15": "Imagination",
    "T16": "The Unconscious",
    "T17": "Education",
    "T18": "Body & Mind",
    "T19": "Beauty",
    "T20": "Art",
    "T21": "Love",
    "T22": "Reality",
    "T23": "Politics",
    "T24": "Work",
    "T25": "Living Together",
    "T26": "Philosophy",
    "T27": "Matter",
    "T28": "Death",
    "T29": "Religion",
    "T30": "History",
    "T31": "Thought",
    "T32": "Madness",
    "T33": "Joy & Happiness"
}

## Test Logic

In [7]:
def execute_test(prompts, test_sample, database, themes, model):
    for prompt_name, prompt in prompts.items():
        delay_between_batches = 20  # Time (in seconds) to wait between batches
        system_instructions = prompt

        def process_in_batches(test_sample, delay_between_batches, system_instructions):

            results = {}

            for text in test_sample:
                allocation = get_theme_allocation(text, system_instructions, model=model).replace(']', '').replace('[', '').replace(' ', '').split(',')
                results[text] = [themes[allocation] for allocation in allocation]

                time.sleep(delay_between_batches)
            return results

        results_dict = process_in_batches(test_sample, delay_between_batches, system_instructions)

        save_answers_to_sqlite('data/models_answers.db', prompt_name, results_dict, database)

        y_pred = create_binary_matrix(themes, [item for key, item in results_dict.items()])
        y_true = create_binary_matrix(themes, [database[key] for key in test_sample])

        jaccard_index, hamming_loss = evaluate_metrics(y_true, y_pred)
        personalized_loss = personalized_metric(y_true, y_pred)[0]

        prompt_metrics_save(prompt_name, prompt, jaccard_index, hamming_loss, personalized_loss)

In [None]:
def update_columns(prompts):
    for prompt_name, prompt in prompts.items():
        tests = import_table_from_sqlite('data/prompt_metrics.db', 'prompts')
        tests.append(prompt_name)

        sdt = []
        ci = []
        metric = []

        for prompt in tests:

            conn = sqlite3.connect('data/models_answers.db')
            df = pd.read_sql(f'SELECT * FROM {prompt}', conn)
            conn.close()

            results_dict = dict()

            for index, row in df.iterrows():
                results_dict[row['Text']] = row['output']

            y_pred = create_binary_matrix(themes, [item for key, item in results_dict.items()])
            y_true = create_binary_matrix(themes, [database[key] for key in test_sample])

            sdt.append(personalized_metric(y_true, y_pred)[1])
            ci.append(str(personalized_metric(y_true, y_pred)[2]))
            metric.append(round(personalized_metric(y_true, y_pred)[0]))

            list_ = personalized_metric(y_true, y_pred)[-1]
            list_ = [int(o) for o in list_]

            add_or_update_column('data/models_answers.db', prompt, 'personalized_metric', 'INTEGER', list_)

        add_or_update_column('data/prompt_metrics.db', 'prompts', 'SDT', 'INTEGER', sdt)
        add_or_update_column('data/prompt_metrics.db', 'prompts', 'CI', 'TEXT', ci)
        add_or_update_column('data/prompt_metrics.db', 'prompts', 'personalized_loss', 'REAL', metric)

## Test!

In [6]:
# prompts = {}

In [None]:
execute_test(prompts, test_sample, database, themes, 'model_name')
update_columns(prompts)