In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
zo_opt_file = "ZO_opt.csv"
hybrid_file = "Hybrid.csv"
peft_file = "PEFT.csv"
mem_file = "Mem_usage.csv"

zo_opt_df = pd.read_csv(zo_opt_file).dropna(how='all')
hybrid_df = pd.read_csv(hybrid_file).dropna(how='all')
peft_df = pd.read_csv(peft_file).dropna(how='all')
mem_df = pd.read_csv(mem_file).dropna(how='all')

In [3]:
SMALL_MODELS = ("RoBERTa-Base", "RoBERTa-large", "RoBERTa-large ")
MEDIUM_MODELS = ("OPT-1.3B", "OPT-2.7B")
LARGE_MODELS = ("LLaMA-7b", "LLaMA2-7b", "LlaMA3-8b", "OPT-13B")

TASK_TYPE_TO_DATASET = {"Natural Language Inference": ("SNLI", "MNLI", "RTE", "CB"),
                        "Sentiment Analysis": ("SST-2", "SST-5"),
                        "Reading Comprehension/Question Answering": ("SQuAD", "MultiRC", "ReCoRD", "DROP", "BoolQ"),
                        "Commonsense & Causal Reasoning": ("COPA", "WSC"),
                        "Word Sense/Contextual Meaning": ("WIC"),
                        "Question Classification": ("TREC")}

def get_dataset_type(dataset_name):
    for task_type, datasets in TASK_TYPE_TO_DATASET.items():
        if dataset_name in datasets:
            return task_type
    return "Unknown"

def get_model_size(model_name):
    if model_name in SMALL_MODELS:
        return "small"
    elif model_name in MEDIUM_MODELS:
        return "medium"
    elif model_name in LARGE_MODELS:
        return "large"
    else:
        return "unknown"

In [None]:
zo_opt_df['method_type'] = 'zo_opt'
hybrid_df['method_type'] = 'hybrid'
peft_df['method_type'] = 'peft'

combined_df = pd.concat([zo_opt_df, hybrid_df, peft_df], ignore_index=True)
combined_df['model_type'] = combined_df['Model'].apply(get_model_size)
combined_df = combined_df.map(lambda x: x * 100 if isinstance(x, (int, float)) and x < 1 else x)

In [None]:
def interpolate(combined_df, fix_elements, threshold=5):
    """
    Interpolate missing performance values by averaging available data while fixing certain elements.
        method A (with category A') 
        fine-tuning model B (with model size B')
        on dataset C (with task type C')
    Fix some of subset of {A, A', B, B', C, C'} and take the avg
    If not enough data for a criteria (eg A), fix its ' (eg A') instead
    eg. Interpolating (x): HiZOO fine-tuning llama2-7b on the SST-2 dataset
        fix_elements = ("A", "B'", "C'")
        x = mean(HiZOO fine-tuning large model on sentiment analysis dataset)
    
    Parameters:
    - combined_df (pd.DataFrame): Merged DataFrame containing all method types.
    - fix_elements (tuple): Elements to fix during interpolation.
    - threshold (int, optional): Minimum number of available data points required to refine filtering. Default is 5.
    
    Returns:
    - pd.DataFrame: The DataFrame with interpolated missing values.
    """

    assert isinstance(zo_opt_df, pd.DataFrame) and isinstance(hybrid_df, pd.DataFrame) and isinstance(peft_df, pd.DataFrame)
    assert isinstance(fix_elements, list) and len(fix_elements) <= 6
    assert all([element in ("A", "A'", "B", "B'", "C", "C'") for element in fix_elements])

    new_df = combined_df.copy()
    for i, row in combined_df.iterrows():
        method, model = row.iloc[0], row.iloc[1]
        method_type = row['method_type']
        model_type = get_model_size(model)

        for j, entry in enumerate(row[2:]):
            if isinstance(entry, str) or not np.isnan(entry):
                continue

            dataset_name = combined_df.columns[j+2]
            dataset_type = get_dataset_type(dataset_name)
            sample_df = combined_df

            # filter by general category
            if "A'" in fix_elements:
                sample_df_experiment = sample_df[sample_df['method_type'] == method_type]
            if "B'" in fix_elements:
                sample_df = sample_df[sample_df['model_type'] == model_type]
            if "C'" in fix_elements:
                good_columns = [get_dataset_type(col) == dataset_type 
                                for col in combined_df.columns]
                good_columns[0] = True
                good_columns[1] = True
                sample_df = sample_df.iloc[:, good_columns]
            
            # filter by specific if possible
            if "A" in fix_elements:
                sample_df_experiment = sample_df[sample_df['method'] == method]
                if sample_df.iloc[:, 2:].apply(lambda row: row.notna().sum(), axis=1).sum() >= threshold:
                    sample_df = sample_df_experiment
            if "B" in fix_elements:
                sample_df_experiment = sample_df[sample_df['model'] == model]
                if sample_df.iloc[:, 2:].apply(lambda row: row.notna().sum(), axis=1).sum() >= threshold:
                    sample_df = sample_df_experiment
            if "C" in fix_elements:
                sample_df_experiment = sample_df.loc[:, ("Method", "Model", dataset_name)]
                if sample_df.iloc[:, 2:].apply(lambda row: row.notna().sum(), axis=1).sum() >= threshold:
                    sample_df = sample_df_experiment

            # calculate
            new_df.iloc[i, j+2] = sample_df.iloc[:, 2:].apply(pd.to_numeric, errors='coerce').mean(skipna=True).mean(skipna=True)
    return new_df

In [None]:
q1_df = interpolate(combined_df, ["A'", "B'", "C'"])

In [None]:
print(combined_df.isna().sum().sum())
print(q1_df.isna().sum().sum())