In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [28]:
zo_opt_file = "ZO_opt.csv"
hybrid_file = "Hybrid.csv"
peft_file = "PEFT.csv"
mem_file = "Mem_usage.csv"

In [29]:
zo_opt_df = pd.read_csv(zo_opt_file).dropna(how='all')
hybrid_df = pd.read_csv(hybrid_file).dropna(how='all')
peft_df = pd.read_csv(peft_file).dropna(how='all')
mem_df = pd.read_csv(mem_file).dropna(how='all')

In [41]:
SMALL_MODELS = ("RoBERTa-Base", "RoBERTa-large", "RoBERTa-large ")
MEDIUM_MODELS = ("OPT-1.3B", "OPT-2.7B")
LARGE_MODELS = ("LLaMA-7b", "LLaMA2-7b", "LlaMA3-8b", "OPT-13B")

In [39]:
TASK_TYPE_TO_DATASET = {"Natural Language Inference": ("SNLI", "MNLI", "RTE", "CB"),
                        "Sentiment Analysis": ("SST-2", "SST-5"),
                        "Reading Comprehension/Question Answering": ("SQuAD", "MultiRC", "ReCoRD", "DROP", "BoolQ"),
                        "Commonsense & Causal Reasoning": ("COPA", "WSC"),
                        "Word Sense/Contextual Meaning": ("WIC"),
                        "Question Classification": ("TREC")}

In [44]:
def clean_dataframe(df):
    """
    Interpolates missing values in a DataFrame.
    Assumes that numerical columns can be interpolated.
    """
    # First, fill any leading/trailing NaNs if needed.
    df = df.fillna(method='ffill').fillna(method='bfill')
    # Then, interpolate internal NaNs.
    df = df.interpolate()
    return df

def get_model_size(model_name):
    """
    Returns the model size category for a given model name.
    """
    # Remove any extra whitespace for consistency
    model_name = model_name.strip()
    if model_name in SMALL_MODELS:
        return 'Small'
    elif model_name in MEDIUM_MODELS:
        return 'Medium'
    elif model_name in LARGE_MODELS:
        return 'Large'
    else:
        return 'Unknown'

def aggregate_results(perf_dfs, mem_df):
    """
    Aggregates performance and memory usage data.
    
    Args:
      perf_dfs (list): List of DataFrames containing performance metrics.
      mem_df (DataFrame): DataFrame containing memory usage data.
    
    Returns:
      aggregated (DataFrame): A DataFrame with each model's average performance and memory usage.
    """
    # Clean all dataframes
    perf_dfs = [clean_dataframe(df) for df in perf_dfs]
    mem_df = clean_dataframe(mem_df)
    
    # Assume each performance df has a 'Model' and a 'Performance' column.
    # First, we merge the performance dfs by 'Model'
    merged_perf = perf_dfs[0]
    for df in perf_dfs[1:]:
        merged_perf = pd.merge(merged_perf, df, on='Model', how='outer', suffixes=('', '_dup'))
    
    # If there are duplicate performance columns, average them.
    # For demonstration, we assume that after merge, columns with similar names can be averaged.
    # Here we simply take the mean across numeric columns (ignoring the 'Model' column).
    performance_cols = merged_perf.select_dtypes(include=[np.number]).columns
    merged_perf['Avg_Performance'] = merged_perf[performance_cols].mean(axis=1)
    
    # Merge the performance data with memory usage data on 'Model'
    aggregated = pd.merge(merged_perf[['Model', 'Avg_Performance']], mem_df, on='Model', how='outer')
    # Assume memory usage is in a column called 'Memory'
    # If there are duplicates or extra columns, adjust as needed.
    
    # Add model size classification
    aggregated['Model_Size'] = aggregated['Model'].apply(get_model_size)
    
    return aggregated

def analyze_model_sizes(aggregated):
    """
    Analyzes aggregated results by grouping by model size and visualizing performance and memory usage.
    
    Args:
      aggregated (DataFrame): DataFrame with columns ['Model', 'Avg_Performance', 'Memory', 'Model_Size'].
    
    Returns:
      None: Displays plots.
    """
    # Group by Model_Size
    grouped = aggregated.groupby('Model_Size').agg({
        'Avg_Performance': 'mean',
        'Memory': 'mean'
    }).reset_index()
    
    print("Aggregated averages by model size:")
    print(grouped)
    
    # Visualization: bar chart for average performance by model size
    plt.figure(figsize=(14, 6))
    
    plt.subplot(1, 2, 1)
    sns.barplot(x='Model_Size', y='Avg_Performance', data=aggregated, ci='sd')
    plt.title("Average Performance by Model Size")
    plt.xlabel("Model Size")
    plt.ylabel("Average Performance")
    
    # Visualization: bar chart for average memory usage by model size
    plt.subplot(1, 2, 2)
    sns.barplot(x='Model_Size', y='Memory', data=aggregated, ci='sd')
    plt.title("Average Memory Usage by Model Size")
    plt.xlabel("Model Size")
    plt.ylabel("Memory Usage")
    
    plt.tight_layout()
    plt.show()

# Run analysis

# List of performance DataFrames
performance_dfs = [zo_opt_df, hybrid_df, peft_df]

# Aggregate results (this assumes each CSV has a 'Model' column and at least one numeric column for performance and memory usage)
aggregated_results = aggregate_results(performance_dfs, mem_df)

# Visualize and analyze the results
analyze_model_sizes(aggregated_results)


{'Hybrid': squad      0
multirc    0
record     0
drop       0
dtype: int64, 'PEFT': squad      0
multirc    0
record     0
drop       0
dtype: int64, 'ZO_opt': squad      0
multirc    0
record     0
drop       0
dtype: int64}
