In [None]:
import os
import json
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
import copy
from IPython.display import display
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
BASE_PATH = "/content/drive/MyDrive/BachelorThesisResults"
DEFAULT_OFFSET_PATH = "no_model_name_available/no_revision_available"
GLOBAL_SAVE_DIR = "/content/drive/MyDrive/BachelorThesisResults/tables"

In [None]:
tasks_dict = {
    "classification": [
        "Banking77Classification",
        "EmotionClassification",
        "TweetSentimentExtractionClassification",
        "AmazonCounterfactualClassification",
        "MassiveIntentClassification",
        "MassiveScenarioClassification",
        "MTOPDomainClassification",
        "MTOPIntentClassification"
    ],
    "clustering": [
        "ArXivHierarchicalClusteringP2P",
        "ArXivHierarchicalClusteringS2S",
        "BiorxivClusteringP2P.v2",
        "BiorxivClusteringS2S.v2",
        "MedrxivClusteringP2P.v2",
        "MedrxivClusteringS2S.v2",
        "RedditClustering.v2",
        "StackExchangeClustering.v2",
        "StackExchangeClusteringP2P.v2",
        "TwentyNewsgroupsClustering.v2",
    ],
    "sts": [
        "BIOSSES",
        "SICK-R",
        "STS12",
        "STS13",
        "STS14",
        "STS15",
        "STS16",
        "STSBenchmark",
        "STS17",
        "STS22",
    ],
    "pairclass": [
        "SprintDuplicateQuestions",
        "TwitterSemEval2015",
        "TwitterURLCorpus",
    ],
    "retrieval": [
        "ArguAna",
        "CQADupstackWebmastersRetrieval",
        "NFCorpus",
    ],
    "rerank": [
        "AskUbuntuDupQuestions",
        "MindSmallReranking",
        "StackOverflowDupQuestions"
    ],
    "summ": [
        "SummEval"
    ],
    "all": [
        "Banking77Classification",
        "EmotionClassification",
        "TweetSentimentExtractionClassification",
        "AmazonCounterfactualClassification",
        "MassiveIntentClassification",
        "MassiveScenarioClassification",
        "MTOPDomainClassification",
        "MTOPIntentClassification",
        "ArXivHierarchicalClusteringP2P",
        "ArXivHierarchicalClusteringS2S",
        "BiorxivClusteringP2P.v2",
        "BiorxivClusteringS2S.v2",
        "MedrxivClusteringP2P.v2",
        "MedrxivClusteringS2S.v2",
        "RedditClustering.v2",
        "StackExchangeClustering.v2",
        "StackExchangeClusteringP2P.v2",
        "TwentyNewsgroupsClustering.v2",
        "BIOSSES",
        "SICK-R",
        "STS12",
        "STS13",
        "STS14",
        "STS15",
        "STS16",
        "STSBenchmark",
        "STS17",
        "STS22",
        "SprintDuplicateQuestions",
        "TwitterSemEval2015",
        "TwitterURLCorpus",
        "ArguAna",
        "CQADupstackWebmastersRetrieval",
        "NFCorpus",
        "AskUbuntuDupQuestions",
        "MindSmallReranking",
        "StackOverflowDupQuestions",
        "SummEval"
    ]
}

In [None]:
tasks_dict_mapping = {
    "CLA": [
        "Banking77Classification",
        "EmotionClassification",
        "TweetSentimentExtractionClassification",
        "AmazonCounterfactualClassification",
        "MassiveIntentClassification",
        "MassiveScenarioClassification",
        "MTOPDomainClassification",
        "MTOPIntentClassification"
    ],
    "CLU": [
        "ArXivHierarchicalClusteringP2P",
        "ArXivHierarchicalClusteringS2S",
        "BiorxivClusteringP2P.v2",
        "BiorxivClusteringS2S.v2",
        "MedrxivClusteringP2P.v2",
        "MedrxivClusteringS2S.v2",
        "RedditClustering.v2",
        "StackExchangeClustering.v2",
        "StackExchangeClusteringP2P.v2",
        "TwentyNewsgroupsClustering.v2",
    ],
    "STS": [
        "BIOSSES",
        "SICK-R",
        "STS12",
        "STS13",
        "STS14",
        "STS15",
        "STS16",
        "STSBenchmark",
        "STS17",
        "STS22",
    ],
    "PCL": [
        "SprintDuplicateQuestions",
        "TwitterSemEval2015",
        "TwitterURLCorpus",
    ],
    "RET": [
        "ArguAna",
        "CQADupstackWebmastersRetrieval",
        "NFCorpus",
    ],
    "RER": [
        "AskUbuntuDupQuestions",
        "MindSmallReranking",
        "StackOverflowDupQuestions"
    ],
    "SUM": [
        "SummEval"
    ]}

In [None]:
def replace_table_headers(headers, tasks_dict_mapping):
    new_headers = []
    for header in headers:
        replaced = False
        for key, tasks in tasks_dict_mapping.items():
            if header in tasks:
                index = tasks.index(header) + 1   
                new_header = f"{key}\\\\{index}"
                new_headers.append(new_header)
                replaced = True
                break
        if not replaced:
            new_headers.append(header)  
    return new_headers

def dataframe_to_latex(df, test_type, model_name, precision=4, make_table_tiny=True, caption="Placeholder", custom_name=None, replace_relative=False):

    df = copy.deepcopy(df)

    if replace_relative:
      df.columns = [col.replace('Relative_', '') for col in df.columns]

    # Replace the column names using the replace_table_headers function
    df.columns = replace_table_headers(df.columns, tasks_dict_mapping)

    # Rename "Dimensionality" to "Dim."
    df.columns = ['d' if col == 'Dimensionality' else col for col in df.columns]

    # Rename "Quantization Technique" to "Quant.\\Tech."
    df.columns = ['q' if col == 'Quantization Technique' else col for col in df.columns]

    df.columns = ["l" if col=="Inference Layer" else col for col in df.columns]

    df.columns = ["req. bits" if col=='Memory Used (bits)' else col for col in df.columns]

     # Apply shortening to the "q" column values
    if 'q' in df.columns:
        df['q'] = df['q'].replace({
            'float32': 'f32',
            'binary': 'bin',
            'int8': 'int'
        })

    # Apply rounding to all float values in the dataframe
    df = df.applymap(lambda x: f"{x:.{precision}f}" if isinstance(x, float) else x)

    # Prepare the column names for LaTeX with makecell and escape underscores
    df.columns = [r'\makecell{' + col.replace('_', r'\\').replace(' ', r'\\') + '}' for col in df.columns]

    # Generate LaTeX string from DataFrame
    latex_str = df.to_latex(index=False, escape=False)

    # Table label
    table_label = f"table_app:{model_name}_{test_type}"

    # Format the table only if make_table_tiny is True
    if make_table_tiny:
        latex_str = f"""
    \\begin{{table}}[H]
    \\hspace*{{-1.5cm}}
    \\setlength{{\\tabcolsep}}{{1pt}} % Reduce horizontal cell padding
    \\renewcommand{{\\arraystretch}}{{0.75}} % Reduce vertical cell padding
    \\tiny % Smaller font size
    \\setcellgapes{{1pt}} % Reduce the cell gaps set earlier
    \\makegapedcells
    \\centering
    {latex_str.splitlines()[0]} \\\\
    {' '.join(latex_str.splitlines()[1:])}
    \\caption{{{caption}}}
    \\label{{{table_label}}}
    \\end{{table}}
    """
    else:
        latex_str = f"""
    \\begin{{table}}[ht]
    \\centering
    \\begin{{tabular}}{{lrlllllllllllllllllllllllllllllllllllllllllllllll}}
    \\toprule
    {latex_str.splitlines()[0]} \\\\
    \\midrule
    {' '.join(latex_str.splitlines()[1:])}
    \\bottomrule
    \\end{{tabular}}
    \\caption{{Your caption here}}
    \\label{{{table_label}}}
    \\end{{table}}
    """

    directory = os.path.join(GLOBAL_SAVE_DIR, test_type, model_name)
    os.makedirs(directory, exist_ok=True)

    if not custom_name:
      file_path = os.path.join(directory, f"{model_name}.tex")
    else:
      file_path = os.path.join(directory, f"{model_name}_{custom_name}.tex")

    with open(file_path, 'w') as f:
        f.write(latex_str)
    print(latex_str)

    return latex_str


In [None]:
models_dict = {
    "nomic-embed-text-v1.5": {
        "path": f"{BASE_PATH}/v1/nomic-ai",
        "dims": [768, 512, 256, 128, 64, 32, 16, 8],
        "quantization_techniques": ["float32", "int8", "binary"],
        "offset_path": DEFAULT_OFFSET_PATH
    },
    "mxbai-embed-large-v1": {
        "path": f"{BASE_PATH}/v1/mixedbread-ai",
        "dims": [1024, 512, 256, 128, 64, 32, 16, 8],
        "quantization_techniques": ["float32", "int8", "binary"],
        "offset_path": DEFAULT_OFFSET_PATH
    },
    "stella_en_400M_v5": {
        "path": f"{BASE_PATH}/v1/dunzhang",
        "dims": [8192, 4096, 2048, 1024, 512, 256, 128, 64],
        "quantization_techniques": ["float32", "int8", "binary"],
        "offset_path": DEFAULT_OFFSET_PATH
    }
}


def handle_underscores_for_latex(model_name):
    return model_name.replace("_", "\\_")

# Time

In [None]:
def get_caption_time_one_dim(model_name):
    return f"\\tableCaptionTimeOneDim{{{handle_underscores_for_latex(model_name)}}}"
def get_caption_time_avg_one_dim(model_name):
    return f"\\tableCaptionTimeAvgOneDim{{{handle_underscores_for_latex(model_name)}}}"
def get_caption_time_relative_one_dim(model_name):
    return f"\\tableCaptionTimeRelativeOneDim{{{handle_underscores_for_latex(model_name)}}}"
def get_caption_time_avg_relative_one_dim(model_name):
    return f"\\tableCaptionTimeAvgRelativeOneDim{{{handle_underscores_for_latex(model_name)}}}"

def get_all_eval_times(base_path, model_name, dims, quantization_techniques, tasks, offset_path):
    scores = []

    for technique in quantization_techniques:
        for dim in dims:
            row = {'Quantization Technique': technique, 'Dimensionality': dim}
            total_time = 0
            for task in tasks:
                subfolder = f"{model_name}_{dim}_{technique}"
                file_path = os.path.join(base_path, subfolder, offset_path, f"{task}.json")

                try:
                    with open(file_path, 'r') as file:
                        data = json.load(file)
                        eval_time = data["evaluation_time"]
                        row[task] = eval_time
                        total_time += eval_time
                except (FileNotFoundError, KeyError) as e:
                    print(f"Error reading file {file_path}: {e}")
                    row[task] = None

            if 'all' in tasks_dict and tasks == tasks_dict['all']:
                row['Total_Time'] = total_time

            scores.append(row)

    return scores

def calculate_averages(df, tasks_dict):
    df['Average_All_Tasks'] = df[tasks_dict['all']].mean(axis=1)
    for category, tasks in tasks_dict.items():
        if category != 'all':  
            df[f'Average_{category}'] = df[tasks].mean(axis=1)
    return df

def generate_pandas_dataframe_eval_times(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    scores = get_all_eval_times(base_path, model_name, dims, quantization_techniques, tasks, offset_path)
    df = pd.DataFrame(scores)  
    if 'Total_Time' in df.columns:
        df = df.drop(columns=['Total_Time'])
    df = df.round(4)
    if 'Total_Time' in df.columns:
        total_evaluation_time = df['Total_Time'].sum()
        print(f"Total evaluation time needed for the entire model: {total_evaluation_time:.2f} seconds")
    dataframe_to_latex(df=df, test_type="Time", model_name=model_name, precision=2, caption=get_caption_time_one_dim(model_name))
    return df


def generate_pandas_dataframe_eval_times_average(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    scores = get_all_eval_times(base_path, model_name, dims, quantization_techniques, tasks, offset_path)
    df = pd.DataFrame(scores)
    df = calculate_averages(df, tasks_dict)
    average_columns = [col for col in df.columns if col.startswith('Average_') or col=="Total_Time"]
    df_avg_only = df[['Quantization Technique', 'Dimensionality'] + average_columns]
    df_avg_only = df_avg_only.round(4)
    if 'Total_Time' in df.columns:
        total_evaluation_time = df['Total_Time'].sum()
        print(f"Total evaluation time needed for the entire model: {total_evaluation_time:.2f} seconds")
    dataframe_to_latex(df=df_avg_only, test_type="TimeAvg", model_name=model_name, precision=2, caption=get_caption_time_avg_one_dim(model_name))
    return df_avg_only

def generate_pandas_dataframe_relative_eval_times(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    scores = get_all_eval_times(base_path, model_name, dims, quantization_techniques, tasks, offset_path)
    df = pd.DataFrame(scores)
    max_dim = max(dims)
    baseline_filter = (df['Dimensionality'] == max_dim) & (df['Quantization Technique'] == 'float32')
    baseline_df = df[baseline_filter]
    baseline_times = {}
    for task in tasks:
        baseline_times[task] = baseline_df[task].values[0] if task in baseline_df.columns else None    
    for task in tasks:
        if task in df.columns:
            df[f'Relative_{task}'] = df[task] / baseline_times[task]
    relative_columns = [col for col in df.columns if col.startswith('Relative_')]
    df_relative_times = df[['Quantization Technique', 'Dimensionality'] + relative_columns]
    df_relative_times = df_relative_times.round(4)
    dataframe_to_latex(df=df_relative_times, test_type="RelativeTime", model_name=model_name, precision=3, caption=get_caption_time_relative_one_dim(model_name), replace_relative=True)
    return df_relative_times

def generate_pandas_dataframe_eval_times_average_relative(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    df_relative_times = generate_pandas_dataframe_relative_eval_times(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict)
    average_columns = {}
    for category, task_list in tasks_dict.items():
        relative_task_columns = [f'Relative_{task}' for task in task_list if f'Relative_{task}' in df_relative_times.columns]
        if relative_task_columns:
            df_relative_times[f'Average_Relative_{category}'] = df_relative_times[relative_task_columns].mean(axis=1)
            average_columns[f'Average_Relative_{category}'] = df_relative_times[f'Average_Relative_{category}']
    df_avg_relative_only = df_relative_times[['Quantization Technique', 'Dimensionality'] + list(average_columns.keys())]
    df_avg_relative_only = df_avg_relative_only.round(4)
    dataframe_to_latex(df=df_avg_relative_only, test_type="AverageRelativeTime", model_name=model_name, precision=3, caption=get_caption_time_avg_relative_one_dim(model_name))
    return df_avg_relative_only



## Stella en_400M_v5

In [None]:
MODEL_NAME = "stella_en_400M_v5"
MODEL_OFFSET_PATH = models_dict[MODEL_NAME]["offset_path"]
MODEL_BASE_PATH = models_dict[MODEL_NAME]["path"]
dims = models_dict[MODEL_NAME]["dims"]
quantization_techniques = models_dict[MODEL_NAME]["quantization_techniques"]

In [None]:
df_time_stella = generate_pandas_dataframe_eval_times(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_time_stella)

### Average

In [None]:
df_time_stella_avg = generate_pandas_dataframe_eval_times_average(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_time_stella_avg)

### Relative

In [None]:
df_relative_time_stella = generate_pandas_dataframe_relative_eval_times(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_relative_time_stella)

### Average Relative

In [None]:
df_avg_relative_time_stella = generate_pandas_dataframe_eval_times_average_relative(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_avg_relative_time_stella)

## MixedBread-AI V1

In [None]:
MODEL_NAME = "mxbai-embed-large-v1"
MODEL_OFFSET_PATH = models_dict[MODEL_NAME]["offset_path"]
MODEL_BASE_PATH = models_dict[MODEL_NAME]["path"]
dims = models_dict[MODEL_NAME]["dims"]
quantization_techniques = models_dict[MODEL_NAME]["quantization_techniques"]

In [None]:
df_time_mbai = generate_pandas_dataframe_eval_times(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_time_mbai)

### Average

In [None]:
df_time_mbai_avg = generate_pandas_dataframe_eval_times_average(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_time_mbai_avg)

### Relative

In [None]:
df_relative_time_mbai = generate_pandas_dataframe_relative_eval_times(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_relative_time_mbai)

### Average Relative

In [None]:
df_avg_relative_time_mbai = generate_pandas_dataframe_eval_times_average_relative(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_avg_relative_time_mbai)

## Nomic-text-embed-v1.5

In [None]:
MODEL_NAME = "nomic-embed-text-v1.5" #"mxbai-embed-large-v1"
MODEL_BASE_PATH = models_dict[MODEL_NAME]["path"]
MODEL_OFFSET_PATH = models_dict[MODEL_NAME]["offset_path"]
dims = models_dict[MODEL_NAME]["dims"]
quantization_techniques = models_dict[MODEL_NAME]["quantization_techniques"]

In [None]:
df_time_nomic = generate_pandas_dataframe_eval_times(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_time_nomic)

### Average

In [None]:
df_time_nomic_avg = generate_pandas_dataframe_eval_times_average(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_time_nomic_avg)

### Relative

In [None]:
df_relative_time_nomic = generate_pandas_dataframe_relative_eval_times(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_relative_time_nomic)

### Average Relative

In [None]:
df_avg_relative_time_nomic = generate_pandas_dataframe_eval_times_average_relative(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_avg_relative_time_nomic)

# Accuracy Compute TradeOff

In [None]:
def get_caption_memory_one_dim(model_name):
    return f"\\tableCaptionMemoryOneDim{{{handle_underscores_for_latex(model_name)}}}"
def get_caption_memory_avg_one_dim(model_name):
    return f"\\tableCaptionMemoryAvgOneDim{{{handle_underscores_for_latex(model_name)}}}"
def get_caption_memory_relative_one_dim(model_name):
    return f"\\tableCaptionMemoryRelativeOneDim{{{handle_underscores_for_latex(model_name)}}}"
def get_caption_memory_avg_relative_one_dim(model_name):
    return f"\\tableCaptionMemoryAvgRelativeOneDim{{{handle_underscores_for_latex(model_name)}}}"

def get_all_scores_accuracy_compute(base_path, model_name, dims, quantization_techniques, tasks, offset_path):
    scores = []

    for technique in quantization_techniques:
        for dim in dims:
            row = {
                'Quantization Technique': technique,
                'Dimensionality': dim,
                'Memory Used (bits)': calculate_memory_used(dim, technique)
            }
            for task in tasks:
                subfolder = f"{model_name}_{dim}_{technique}"
                file_path = os.path.join(base_path, subfolder, offset_path, f"{task}.json")
                try:
                    with open(file_path, 'r') as file:
                        data = json.load(file)
                        main_score = data["scores"]["test"][0]["main_score"]
                        row[task] = main_score
                except (FileNotFoundError, KeyError) as e:
                    print(f"Error reading file {file_path}: {e}")
                    row[task] = None

            scores.append(row)

    return scores

def calculate_memory_used(embedding_size, quantization_method):
    if quantization_method == 'float32':
        return embedding_size * 32
    elif quantization_method == 'int8':
        return embedding_size * 8
    elif quantization_method == 'binary':
        return embedding_size * 1
    else:
        raise ValueError(f"Unknown quantization method: {quantization_method}")

def calculate_averages(df, tasks_dict):
    df['Average_All_Tasks'] = df[tasks_dict['all']].mean(axis=1)
    return df

def calculate_tradeoff_metrics(df):
    df['MAP'] = df['Memory Used (bits)'] * (1 - df['Average_All_Tasks'])
    max_memory = df['Memory Used (bits)'].max()
    df['NMAT_Average'] = df['Memory Used (bits)'] / max_memory + (1 - df['Average_All_Tasks'])
    return df

def generate_pandas_dataframe_accuracy_compute(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    scores = get_all_scores_accuracy_compute(base_path, model_name, dims, quantization_techniques, tasks, offset_path)
    df = pd.DataFrame(scores)
    df = df.round(4)
    dataframe_to_latex(df=df, test_type="Memory", model_name=model_name, precision=3, caption=get_caption_memory_one_dim(model_name))
    return df


def get_all_scores_accuracy_compute_averages(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    scores = []
    for technique in quantization_techniques:
        for dim in dims:
            row = {
                'Quantization Technique': technique,
                'Dimensionality': dim,
                'Memory Used (bits)': calculate_memory_used(dim, technique)
            }
            task_category_averages = {category: [] for category in tasks_dict}
            all_task_scores = []
            for task in tasks:
                subfolder = f"{model_name}_{dim}_{technique}"
                file_path = os.path.join(base_path, subfolder, offset_path, f"{task}.json")
                try:
                    with open(file_path, 'r') as file:
                        data = json.load(file)
                        main_score = data["scores"]["test"][0]["main_score"]
                        all_task_scores.append(main_score)
                        for category, task_list in tasks_dict.items():
                            if task in task_list:
                                task_category_averages[category].append(main_score)
                except (FileNotFoundError, KeyError) as e:
                    print(f"Error reading file {file_path}: {e}")
            for category, scores_list in task_category_averages.items():
                if scores_list:
                    row[f'Average_{category}'] = sum(scores_list) / len(scores_list)
                else:
                    row[f'Average_{category}'] = None
            scores.append(row)
    return scores

def generate_pandas_dataframe_accuracy_compute_averages(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    scores = get_all_scores_accuracy_compute_averages(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict)
    df = pd.DataFrame(scores)
    df = df.round(4)
    dataframe_to_latex(df=df, test_type="MemoryAvg", model_name=model_name, precision=3, caption=get_caption_memory_avg_one_dim(model_name))
    return df


def generate_pandas_dataframe_relative_scores(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    scores = get_all_scores_accuracy_compute(base_path, model_name, dims, quantization_techniques, tasks, offset_path)
    df = pd.DataFrame(scores)
    max_dim = max(dims)
    baseline_filter = (df['Dimensionality'] == max_dim) & (df['Quantization Technique'] == 'float32')
    baseline_df = df[baseline_filter]
    baseline_scores = {}
    for task in tasks:
        baseline_scores[task] = baseline_df[task].values[0] if task in baseline_df.columns else None
    for task in tasks:
        if task in df.columns:
            df[f'Relative_{task}'] = df[task] / baseline_scores[task]
    relative_columns = [col for col in df.columns if col.startswith('Relative_')]
    df_relative_scores = df[['Quantization Technique', 'Dimensionality', 'Memory Used (bits)'] + relative_columns]
    df_relative_scores = df_relative_scores.round(4)
    dataframe_to_latex(df=df_relative_scores, test_type="RelativeScores", model_name=model_name, precision=3, caption=get_caption_memory_relative_one_dim(model_name), replace_relative=True)
    return df_relative_scores


def generate_pandas_dataframe_scores_average_relative(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict):
    df_relative_scores = generate_pandas_dataframe_relative_scores(base_path, model_name, dims, quantization_techniques, tasks, offset_path, tasks_dict)
    for category, task_list in tasks_dict.items():
        relative_task_columns = [f'Relative_{task}' for task in task_list if f'Relative_{task}' in df_relative_scores.columns]
        if relative_task_columns:
            df_relative_scores[f'Average_Relative_{category}'] = df_relative_scores[relative_task_columns].mean(axis=1)
    average_columns = [col for col in df_relative_scores.columns if col.startswith('Average_Relative_')]
    df_avg_relative_only = df_relative_scores[['Quantization Technique', 'Dimensionality', 'Memory Used (bits)'] + average_columns]
    df_avg_relative_only = df_avg_relative_only.round(4)
    dataframe_to_latex(df=df_avg_relative_only, test_type="AverageRelativeScores", model_name=model_name, precision=3, caption=get_caption_memory_avg_relative_one_dim(model_name))
    return df_avg_relative_only

##  Stella en_400M_v5

In [None]:
MODEL_NAME = "stella_en_400M_v5"
MODEL_OFFSET_PATH = models_dict[MODEL_NAME]["offset_path"]
MODEL_BASE_PATH = models_dict[MODEL_NAME]["path"]
dims = models_dict[MODEL_NAME]["dims"]
quantization_techniques = models_dict[MODEL_NAME]["quantization_techniques"]

In [None]:
df_mem_stella = generate_pandas_dataframe_accuracy_compute(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_mem_stella)

### Average

In [None]:
df_mem_stella_avg = generate_pandas_dataframe_accuracy_compute_averages(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_mem_stella_avg)

### Relative

In [None]:
df_relative_scores_stella = generate_pandas_dataframe_relative_scores(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_relative_scores_stella)

### Average Relative

In [None]:
df_avg_relative_scores_stella = generate_pandas_dataframe_scores_average_relative(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_avg_relative_scores_stella)

## Mixed-Bread-Ai v1

In [None]:
MODEL_NAME = "mxbai-embed-large-v1"
MODEL_OFFSET_PATH = models_dict[MODEL_NAME]["offset_path"]
MODEL_BASE_PATH = models_dict[MODEL_NAME]["path"]
dims = models_dict[MODEL_NAME]["dims"]
quantization_techniques = models_dict[MODEL_NAME]["quantization_techniques"]

In [None]:
df_mem_mbai = generate_pandas_dataframe_accuracy_compute(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_mem_mbai)

### Average

In [None]:
df_mem_mbai_avg = generate_pandas_dataframe_accuracy_compute_averages(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_mem_mbai_avg)

### Relative

In [None]:
df_relative_scores_mbai = generate_pandas_dataframe_relative_scores(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_relative_scores_mbai)

### Average Relative

In [None]:
df_avg_relative_scores_mbai = generate_pandas_dataframe_scores_average_relative(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_avg_relative_scores_mbai)

## Nomic-text-embed-v1.5

In [None]:
MODEL_NAME = "nomic-embed-text-v1.5"
MODEL_BASE_PATH = models_dict[MODEL_NAME]["path"]
MODEL_OFFSET_PATH = models_dict[MODEL_NAME]["offset_path"]
dims = models_dict[MODEL_NAME]["dims"]
quantization_techniques = models_dict[MODEL_NAME]["quantization_techniques"]

In [None]:
df_mem_nomic = generate_pandas_dataframe_accuracy_compute(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_mem_nomic)

### Average

In [None]:
df_mem_nomic_avg = generate_pandas_dataframe_accuracy_compute_averages(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_mem_nomic_avg)

### Relative

In [None]:
df_relative_scores_nomic = generate_pandas_dataframe_relative_scores(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_relative_scores_nomic)

### Average Relative

In [None]:
df_avg_relative_scores_nomic = generate_pandas_dataframe_scores_average_relative(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], MODEL_OFFSET_PATH, tasks_dict)
display(df_avg_relative_scores_nomic)

# 2D MixedBread Model

In [None]:
MODEL_NAME = "mxbai-embed-2d-large-v1"
MODEL_OFFSET_PATH = DEFAULT_OFFSET_PATH
MODEL_BASE_PATH = f"{BASE_PATH}/2D/mixedbread-ai"
dims = [1024, 512, 256, 128, 64, 32, 16, 8]
quantization_techniques = ["float32", "int8", "binary"]
INFERENCE_LAYERS = [24, 20, 16, 12]

## Time

In [None]:
def get_caption_time_two_dim(model_name, quantization_technique):
    return f"\\tableCaptionTimeTwoDim{{{model_name}}}{{{quantization_technique}}}"
def get_caption_time_avg_two_dim(model_name):
    return f"\\tableCaptionTimeAvgTwoDim{{{model_name}}}"
def get_caption_time_relative_two_dim(model_name, quantization_techniques):
    return f"\\tableCaptionTimeRelativeTwoDim{{{model_name}}}{{{quantization_techniques}}}"
def get_caption_time_avg_relative_two_dim(model_name):
    return f"\\tableCaptionTimeAvgRelativeTwoDim{{{model_name}}}"



def get_all_eval_times(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path):
    scores = []
    for technique in quantization_techniques:
        for dim in dims:
            for layer in inference_layers:
                row = {'Quantization Technique': technique, 'Dimensionality': dim, 'Inference Layer': layer}
                total_time = 0
                for task in tasks:
                    subfolder = f"{model_name}_{dim}_{technique}_{layer}"
                    file_path = os.path.join(base_path, subfolder, offset_path, f"{task}.json")

                    try:
                        with open(file_path, 'r') as file:
                            data = json.load(file)
                            eval_time = data["evaluation_time"]
                            row[task] = eval_time
                            total_time += eval_time
                    except (FileNotFoundError, KeyError) as e:
                        print(f"Error reading file {file_path}: {e}")
                        row[task] = None
                if 'all' in tasks_dict and tasks == tasks_dict['all']:
                    row['Total_Time'] = total_time
                scores.append(row)
    return scores

def generate_pandas_dataframe_eval_times(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    scores = get_all_eval_times(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path)  
    df = pd.DataFrame(scores)
    df = df.round(2)
    if 'Total_Time' in df.columns:
        total_evaluation_time = df['Total_Time'].sum()
        print(f"Total evaluation time needed for the entire model: {total_evaluation_time:.2f} seconds")
        df = df.drop(columns=['Total_Time'])
    df_float32 = df[df['Quantization Technique'] == 'float32']
    df_int8 = df[df['Quantization Technique'] == 'int8']
    df_binary = df[df['Quantization Technique'] == 'binary']
    dataframe_to_latex(df=df_float32, test_type="Time", model_name=model_name, precision=2, caption=get_caption_time_two_dim(model_name, "float"), custom_name="float")
    dataframe_to_latex(df=df_int8, test_type="Time", model_name=model_name, precision=2, caption=get_caption_time_two_dim(model_name, "int"), custom_name="int")
    dataframe_to_latex(df=df_binary, test_type="Time", model_name=model_name, precision=2, caption=get_caption_time_two_dim(model_name, "binary"), custom_name="binary")
    return df


def calculate_averages(df, tasks_dict):
    df['Average_All_Tasks'] = df[tasks_dict['all']].mean(axis=1)
    for category, tasks in tasks_dict.items():
        if category != 'all':  
            df[f'Average_{category}'] = df[tasks].mean(axis=1)
    return df

def generate_pandas_dataframe_eval_times_average(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    scores = get_all_eval_times(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path)
    df = pd.DataFrame(scores)
    df = df.round(2)
    df = calculate_averages(df, tasks_dict)
    average_columns = [col for col in df.columns if col.startswith('Average_')]
    df_avg_only = df[['Quantization Technique', 'Dimensionality', 'Inference Layer'] + average_columns]
    if 'Total_Time' in df.columns:
        total_evaluation_time = df['Total_Time'].sum()
        print(f"Total evaluation time needed for the entire model: {total_evaluation_time:.2f} seconds")
    dataframe_to_latex(df=df_avg_only, test_type="TimeAvg", model_name=model_name, precision=2, caption=get_caption_time_avg_two_dim(model_name))
    return df_avg_only

def generate_pandas_dataframe_relative_eval_times(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    scores = get_all_eval_times(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path)
    df = pd.DataFrame(scores)
    max_dim = max(dims)
    last_layer = inference_layers[0]  
    baseline_filter = (df['Dimensionality'] == max_dim) & (df['Quantization Technique'] == 'float32') & (df['Inference Layer'] == last_layer)
    baseline_df = df[baseline_filter]
    baseline_times = {}
    for task in tasks:
        baseline_times[task] = baseline_df[task].values[0] if task in baseline_df.columns else None
    for task in tasks:
        if task in df.columns:
            df[f'Relative_{task}'] = df[task] / baseline_times[task]
    relative_columns = [col for col in df.columns if col.startswith('Relative_')]
    df_relative_times = df[['Quantization Technique', 'Dimensionality', 'Inference Layer'] + relative_columns]
    df_relative_times = df_relative_times.round(4)
    df_float32 = df_relative_times[df_relative_times['Quantization Technique'] == 'float32']
    df_int8 = df_relative_times[df_relative_times['Quantization Technique'] == 'int8']
    df_binary = df_relative_times[df_relative_times['Quantization Technique'] == 'binary']
    dataframe_to_latex(df=df_float32, test_type="RelativeTime", model_name=model_name, precision=3, caption=get_caption_time_relative_two_dim(model_name, "float"), custom_name="float", replace_relative=True)
    dataframe_to_latex(df=df_int8, test_type="RelativeTime", model_name=model_name, precision=3, caption=get_caption_time_relative_two_dim(model_name, "int"), custom_name="int", replace_relative=True)
    dataframe_to_latex(df=df_binary, test_type="RelativeTime", model_name=model_name, precision=3, caption=get_caption_time_relative_two_dim(model_name, "binary"), custom_name="binary", replace_relative=True)
    return df_relative_times

def generate_pandas_dataframe_eval_times_average_relative(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    df_relative_times = generate_pandas_dataframe_relative_eval_times(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict)
    for category, task_list in tasks_dict.items():
        relative_task_columns = [f'Relative_{task}' for task in task_list if f'Relative_{task}' in df_relative_times.columns]
        if relative_task_columns:
            df_relative_times[f'Average_Relative_{category}'] = df_relative_times[relative_task_columns].mean(axis=1)
    average_columns = [col for col in df_relative_times.columns if col.startswith('Average_Relative_')]
    df_avg_relative_only = df_relative_times[['Quantization Technique', 'Dimensionality', 'Inference Layer'] + average_columns]
    df_avg_relative_only = df_avg_relative_only.round(4)
    dataframe_to_latex(df=df_avg_relative_only, test_type="AverageRelativeTime", model_name=model_name, precision=3, caption=get_caption_time_avg_relative_two_dim(model_name))
    return df_avg_relative_only

In [None]:
df_time_mbaii = generate_pandas_dataframe_eval_times(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], INFERENCE_LAYERS, MODEL_OFFSET_PATH, tasks_dict)
display(df_time_mbaii)

### Average

In [None]:
df_time_mbaii_avg = generate_pandas_dataframe_eval_times_average(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], INFERENCE_LAYERS, MODEL_OFFSET_PATH, tasks_dict)
display(df_time_mbaii_avg)

### Relative

In [None]:
df_relative_eval_times_stella = generate_pandas_dataframe_relative_eval_times(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], INFERENCE_LAYERS, MODEL_OFFSET_PATH, tasks_dict)
display(df_relative_eval_times_stella)

### Average Relative

In [None]:
df_avg_relative_eval_times_stella = generate_pandas_dataframe_eval_times_average_relative(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], INFERENCE_LAYERS, MODEL_OFFSET_PATH, tasks_dict)
display(df_avg_relative_eval_times_stella)

## Accuracy Compute TradeOff

In [None]:
def get_caption_memory_two_dim(model_name, quantization_technique):
    return f"\\tableCaptionMemoryTwoDim{{{model_name}}}{{{quantization_technique}}}"
def get_caption_memory_avg_two_dim(model_name):
    return f"\\tableCaptionMemoryAvgTwoDim{{{model_name}}}"
def get_caption_memory_relative_two_dim(model_name, quantization_technique):
    return f"\\tableCaptionMemoryRelativeTwoDim{{{model_name}}}{{{quantization_technique}}}"
def get_caption_memory_avg_relative_two_dim(model_name):
    return f"\\tableCaptionMemoryAvgRelativeTwoDim{{{model_name}}}"

def get_all_scores_accuracy_compute(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path):
    scores = []
    for technique in quantization_techniques:
        for dim in dims:
            for layer in inference_layers:
                row = {
                    'Quantization Technique': technique,
                    'Dimensionality': dim,
                    'Inference Layer': layer,
                    'Memory Used (bits)': calculate_memory_used(dim, technique)
                }
                for task in tasks:
                    subfolder = f"{model_name}_{dim}_{technique}_{layer}"
                    file_path = os.path.join(base_path, subfolder, offset_path, f"{task}.json")
                    try:
                        with open(file_path, 'r') as file:
                            data = json.load(file)
                            main_score = data["scores"]["test"][0]["main_score"]
                            row[task] = main_score
                    except (FileNotFoundError, KeyError) as e:
                        print(f"Error reading file {file_path}: {e}")
                        row[task] = None
                scores.append(row)
    return scores

def calculate_memory_used(embedding_size, quantization_method):
    if quantization_method == 'float32':
        return embedding_size * 32
    elif quantization_method == 'int8':
        return embedding_size * 8
    elif quantization_method == 'binary':
        return embedding_size * 1
    else:
        raise ValueError(f"Unknown quantization method: {quantization_method}")

def calculate_averages(df, tasks_dict):
    df['Average_All_Tasks'] = df[tasks_dict['all']].mean(axis=1)
    return df

def calculate_tradeoff_metrics(df):
    return df

def generate_pandas_dataframe_accuracy_compute(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    scores = get_all_scores_accuracy_compute(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path)
    df = pd.DataFrame(scores)
    df = df.round(4)
    df = calculate_tradeoff_metrics(df)
    df_float32 = df[df['Quantization Technique'] == 'float32']
    df_int8 = df[df['Quantization Technique'] == 'int8']
    df_binary = df[df['Quantization Technique'] == 'binary']
    dataframe_to_latex(df=df_float32, test_type="Memory", model_name=model_name, precision=3, caption=get_caption_memory_two_dim(model_name, "float"), custom_name="float")
    dataframe_to_latex(df=df_int8, test_type="Memory", model_name=model_name, precision=3, caption=get_caption_memory_two_dim(model_name, "int"), custom_name="int")
    dataframe_to_latex(df=df_binary, test_type="Memory", model_name=model_name, precision=3, caption=get_caption_memory_two_dim(model_name, "binary"), custom_name="binary")
    return df

def get_all_scores_accuracy_compute_averages(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    scores = []
    for technique in quantization_techniques:
        for dim in dims:
            for layer in inference_layers:
                row = {
                    'Quantization Technique': technique,
                    'Dimensionality': dim,
                    'Inference Layer': layer,
                    'Memory Used (bits)': calculate_memory_used(dim, technique)
                }
                task_category_averages = {category: [] for category in tasks_dict}
                all_task_scores = []
                for task in tasks:
                    subfolder = f"{model_name}_{dim}_{technique}_{layer}"
                    file_path = os.path.join(base_path, subfolder, offset_path, f"{task}.json")
                    try:
                        with open(file_path, 'r') as file:
                            data = json.load(file)
                            main_score = data["scores"]["test"][0]["main_score"]
                            all_task_scores.append(main_score)
                            for category, task_list in tasks_dict.items():
                                if task in task_list:
                                    task_category_averages[category].append(main_score)
                    except (FileNotFoundError, KeyError) as e:
                        print(f"Error reading file {file_path}: {e}")
                for category, scores_list in task_category_averages.items():
                    if scores_list:
                        row[f'Average_{category}'] = sum(scores_list) / len(scores_list)
                    else:
                        row[f'Average_{category}'] = None
                scores.append(row)
    return scores

def generate_pandas_dataframe_accuracy_compute_averages(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    scores = get_all_scores_accuracy_compute_averages(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict)
    df = pd.DataFrame(scores)
    df = df.round(4)
    dataframe_to_latex(df=df, test_type="MemoryAvg", model_name=model_name, precision=3, caption=get_caption_memory_avg_two_dim(model_name))
    return df

def generate_pandas_dataframe_relative_scores(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    scores = get_all_scores_accuracy_compute(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path)
    df = pd.DataFrame(scores)
    max_dim = max(dims)
    last_layer = inference_layers[0] 
    baseline_filter = (df['Dimensionality'] == max_dim) & (df['Quantization Technique'] == 'float32') & (df['Inference Layer'] == last_layer)
    baseline_df = df[baseline_filter]
    baseline_scores = {}
    for task in tasks:
        baseline_scores[task] = baseline_df[task].values[0] if task in baseline_df.columns else None
    for task in tasks:
        if task in df.columns:
            df[f'Relative_{task}'] = df[task] / baseline_scores[task]
    relative_columns = [col for col in df.columns if col.startswith('Relative_')]
    df_relative_scores = df[['Quantization Technique', 'Dimensionality', 'Inference Layer', 'Memory Used (bits)'] + relative_columns]
    df_relative_scores = df_relative_scores.round(4)
    df_float32 = df_relative_scores[df_relative_scores['Quantization Technique'] == 'float32']
    df_int8 = df_relative_scores[df_relative_scores['Quantization Technique'] == 'int8']
    df_binary = df_relative_scores[df_relative_scores['Quantization Technique'] == 'binary']
    dataframe_to_latex(df=df_float32, test_type="RelativeScores", model_name=model_name, precision=3, caption=get_caption_memory_relative_two_dim(model_name, "float"), custom_name="float", replace_relative=True)
    dataframe_to_latex(df=df_int8, test_type="RelativeScores", model_name=model_name, precision=3, caption=get_caption_memory_relative_two_dim(model_name, "int"), custom_name="int", replace_relative=True)
    dataframe_to_latex(df=df_binary, test_type="RelativeScores", model_name=model_name, precision=3, caption=get_caption_memory_relative_two_dim(model_name, "binary"), custom_name="binary", replace_relative=True)
    return df_relative_scores


def generate_pandas_dataframe_scores_average_relative(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict):
    df_relative_scores = generate_pandas_dataframe_relative_scores(base_path, model_name, dims, quantization_techniques, tasks, inference_layers, offset_path, tasks_dict)
    for category, task_list in tasks_dict.items():
        relative_task_columns = [f'Relative_{task}' for task in task_list if f'Relative_{task}' in df_relative_scores.columns]
        if relative_task_columns:
            df_relative_scores[f'Average_Relative_{category}'] = df_relative_scores[relative_task_columns].mean(axis=1)
    average_columns = [col for col in df_relative_scores.columns if col.startswith('Average_Relative_')]
    df_avg_relative_only = df_relative_scores[['Quantization Technique', 'Dimensionality', 'Inference Layer', 'Memory Used (bits)'] + average_columns]
    df_avg_relative_only = df_avg_relative_only.round(4)
    dataframe_to_latex(df=df_avg_relative_only, test_type="AverageRelativeScores", model_name=model_name, precision=3, caption=get_caption_memory_avg_relative_two_dim(model_name))
    return df_avg_relative_only


In [None]:
df_mem_mbaii = generate_pandas_dataframe_accuracy_compute(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], INFERENCE_LAYERS, MODEL_OFFSET_PATH, tasks_dict)
display(df_mem_mbaii)

### Average

In [None]:
df_mem_mbaii_avg = generate_pandas_dataframe_accuracy_compute_averages(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], INFERENCE_LAYERS, MODEL_OFFSET_PATH, tasks_dict)
display(df_mem_mbaii_avg)

### Relative

In [None]:
df_relative_scores_stella = generate_pandas_dataframe_relative_scores(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], INFERENCE_LAYERS, MODEL_OFFSET_PATH, tasks_dict)
display(df_relative_scores_stella)

### Average Relative

In [None]:
df_avg_relative_scores_stella = generate_pandas_dataframe_scores_average_relative(MODEL_BASE_PATH, MODEL_NAME, dims, quantization_techniques, tasks_dict["all"], INFERENCE_LAYERS, MODEL_OFFSET_PATH, tasks_dict)
display(df_avg_relative_scores_stella)

# Co2 Emissions

In [None]:
def sum_total_times(dataframes):
    total_sums = []
    overall_total_time = 0
    for i, df in enumerate(dataframes):
        if 'Total_Time' in df.columns:
            total_sum = df['Total_Time'].sum()
            total_sums.append(total_sum)
            overall_total_time += total_sum
            print(f"Total Time for DataFrame {i+1}: {total_sum:.2f} seconds ({total_sum/3600.0} hours)")
        else:
            total_sums.append(0)
            print(f"DataFrame {i+1} does not have a Total_Time column.")

    print(f"\nOverall Total Time for all DataFrames: {overall_total_time:.2f} seconds ({overall_total_time/3600.0} hours)")
    return total_sums, overall_total_time

In [None]:
dfs_co2 = [df_time_stella, df_time_nomic, df_time_mbai, df_time_mbaii]

In [None]:
sum_total_times(dfs_co2)