In [96]:
import pandas as pd 

In [None]:
def categorize_examples(df, model_name):

    pattern1_dialectmoe = df[
        (df['base_instruct'] != df['true_label']) & 
        (df['lora_grouping'] != df['true_label']) &
        (df['mixlora'] == df['true_label'])
    ].copy()
    pattern1_dialectmoe['pattern'] = 'Baselines Fail, DialectMoE Success'
    pattern1_dialectmoe['model'] = model_name
    
    pattern1_cat = df[
        (df['base_instruct'] != df['true_label']) & 
        (df['lora_grouping'] != df['true_label']) &
        (df['cat'] == df['true_label'])
    ].copy()
    pattern1_cat['pattern'] = 'Baselines Fail, CAT Success'
    pattern1_cat['model'] = model_name
    
    pattern1_ties = df[
        (df['base_instruct'] != df['true_label']) & 
        (df['lora_grouping'] != df['true_label']) &
        (df['ties'] == df['true_label'])
    ].copy()
    pattern1_ties['pattern'] = 'Baselines Fail, TIES Success'
    pattern1_ties['model'] = model_name
    
    # Pattern 2: CAT succeeds, TIES fails
    pattern2_cat_wins = df[
        (df['base_instruct'] != df['true_label']) & 
        (df['lora_grouping'] != df['true_label']) &
        (df['mixlora'] != df['true_label']) & 
        (df['cat'] == df['true_label']) & 
        (df['ties'] != df['true_label'])
    ].copy()
    pattern2_cat_wins['pattern'] = 'CAT Success, TIES Fails'
    pattern2_cat_wins['model'] = model_name
    
    # Pattern 3: TIES succeeds, CAT fails
    pattern3_ties_wins = df[
        (df['base_instruct'] != df['true_label']) & 
        (df['lora_grouping'] != df['true_label']) &
        (df['mixlora'] != df['true_label']) &
        (df['ties'] == df['true_label']) & 
        (df['cat'] != df['true_label'])
    ].copy()
    pattern3_ties_wins['pattern'] = 'TIES Success, CAT Fails'
    pattern3_ties_wins['model'] = model_name
    
    # Pattern 4: DialectMoE succeeds, CAT/TIES fail
    pattern4_moe_wins = df[
        (df['mixlora'] == df['true_label']) & 
        ((df['cat'] != df['true_label']) | (df['ties'] != df['true_label']))
    ].copy()
    pattern4_moe_wins['pattern'] = 'DialectMoE Success, CAT/TIES Fail'
    pattern4_moe_wins['model'] = model_name
    
    # Pattern 5: CAT/TIES succeed, DialectMoE fails
    pattern5_fusion_wins = df[
        ((df['cat'] == df['true_label']) | (df['ties'] == df['true_label'])) &
        (df['mixlora'] != df['true_label'])
    ].copy()
    pattern5_fusion_wins['pattern'] = 'CAT/TIES Success, DialectMoE Fails'
    pattern5_fusion_wins['model'] = model_name
    
    # Pattern 6: ALL proposed methods fail
    pattern6_all_fail = df[
        (df['mixlora'] != df['true_label']) & 
        (df['cat'] != df['true_label']) &
        (df['ties'] != df['true_label'])
    ].copy()
    pattern6_all_fail['pattern'] = 'All Proposed Methods Fail'
    pattern6_all_fail['model'] = model_name
    
    return {
        'baselines_fail_dialectmoe': pattern1_dialectmoe,
        'baselines_fail_cat': pattern1_cat,
        'baselines_fail_ties': pattern1_ties,
        'cat_wins': pattern2_cat_wins,
        'ties_wins': pattern3_ties_wins,
        'dialectmoe_wins': pattern4_moe_wins,
        'fusion_wins': pattern5_fusion_wins,
        'all_fail': pattern6_all_fail
    }

In [None]:
df_llama = pd.read_csv("./data/analysis/llama/analysis_zeroshot_w_mixlora.csv")
df_qwen = pd.read_csv("./data/analysis/qwen/analysis_zeroshot_w_mixlora.csv")
df_phi = pd.read_csv("./data/analysis/phi/analysis_zeroshot_w_mixlora.csv")
df_gemma = pd.read_csv("./data/analysis/gemma2/analysis_zeroshot_w_mixlora.csv")

In [113]:
df_llama.tail()

Unnamed: 0.1,Unnamed: 0,prompt,task,domain,dialect,true_label,base_instruct,lora_grouping,individual_dialect,cat,ties,mixlora
1819,1819,Generate the sentiment of the given text. 1 fo...,Sentiment,Google,en-UK,0,1,0,0,0,0,0
1820,1820,Generate the sentiment of the given text. 1 fo...,Sentiment,Google,en-UK,1,0,0,0,0,0,1
1821,1821,Generate the sentiment of the given text. 1 fo...,Sentiment,Google,en-UK,1,0,0,0,0,0,1
1822,1822,Generate the sentiment of the given text. 1 fo...,Sentiment,Google,en-UK,0,0,0,0,0,0,0
1823,1823,Generate the sentiment of the given text. 1 fo...,Sentiment,Google,en-UK,1,1,0,0,0,0,1


In [114]:
# phi_patterns = categorize_examples(df_phi, 'Phi-3')
# qwen_patterns = categorize_examples(df_qwen, 'Qwen-2.5')
# llama_patterns = categorize_examples(df_llama, 'Llama-3.1')
gemma_patterns = categorize_examples(df_gemma, 'Gemma-2')

In [115]:
gemma_patterns["cat_wins"].to_csv("gemma2_cat_wins.csv")
gemma_patterns["ties_wins"].to_csv("gemma2_ties_wins.csv")

In [None]:
def create_comprehensive_analysis_table(dfs_dict, best_models):

    results = []
    
    for model_name, df in dfs_dict.items():
        
        # Where baselines fail
        baseline_fail_mask = (df['base_instruct'] != df['true_label']) & \
                             (df['lora_grouping'] != df['true_label'])
        
        failing_examples = df[baseline_fail_mask].copy()
        
        for (dialect, domain, task), group in failing_examples.groupby(['dialect', 'domain', 'task']):
            
            for idx in group.head(2).index:
                row = df.loc[idx]
                
                dialectmoe_pred = row['mixlora']
                dialectmoe_model = model_name if model_name in best_models['dialectmoe'] else 'N/A'
                
                cat_pred = row['cat']
                cat_model = model_name if model_name in best_models['cat'] else 'N/A'
                
                ties_pred = row['ties']
                ties_model = model_name if model_name in best_models['ties'] else 'N/A'
                
                successes = []
                if dialectmoe_pred == row['true_label']: successes.append('DialectMoE')
                if cat_pred == row['true_label']: successes.append('CAT')
                if ties_pred == row['true_label']: successes.append('TIES')
                
                if len(successes) == 3:
                    observation = "All proposed methods succeed"
                elif len(successes) == 0:
                    observation = "All proposed methods fail (see error analysis)"
                else:
                    observation = f"{', '.join(successes)} succeed"
                
                results.append({
                    'original_idx' : idx, 
                    'dialect': dialect,
                    'domain': domain,
                    'task': task,
                    'example': row['prompt'], 
                    'true_label': row['true_label'],
                    'base_instruct': row['base_instruct'],
                    'lora_grouping': row['lora_grouping'],
                    'dialectmoe': f"{dialectmoe_pred} ({dialectmoe_model})",
                    'cat': f"{cat_pred} ({cat_model})",
                    'ties': f"{ties_pred} ({ties_model})",
                    'observation': observation
                })
    
    return pd.DataFrame(results)

In [None]:
best_models = {
    'dialectmoe': ['qwen', 'llama'],
    'cat': ['phi'],
    'ties': ['llama']
}

analysis_table = create_comprehensive_analysis_table(
    {'qwen': df_qwen, 'llama': df_llama, 'phi': df_phi},
    best_models
)

In [83]:
analysis_table.iloc[0]

original_idx                                                  1237
dialect                                                      en-AU
domain                                                      Google
task                                                     Sentiment
example          Generate the sentiment of the given text. 1 fo...
true_label                                                       1
base_instruct                                                    0
lora_grouping                                                    0
dialectmoe                                                0 (qwen)
cat                                                        0 (N/A)
ties                                                       0 (N/A)
observation         All proposed methods fail (see error analysis)
Name: 0, dtype: object

In [None]:
analysis_table.to_csv('./data/analysis/final_analysis.csv')

In [95]:
df_phi.iloc[651].prompt

"Generate the sentiment of the given text. 1 for positive sentiment, and 0 for negative sentiment. Do not give an explanation.\nUrgent care clinics might be your next best option: \nIt emergency but for non life threatening issues, anything life threatening I'd be calling an ambulance."

In [93]:
df_llama.iloc[651]

Unnamed: 0                                                          651
prompt                Generate the sentiment of the given text. 1 fo...
task                                                          Sentiment
domain                                                           Reddit
dialect                                                           en-AU
true_label                                                            0
base_instruct                                                         0
lora_grouping                                                         0
individual_dialect                                                    0
cat                                                                   0
ties                                                                  0
mixlora                                                               1
Name: 651, dtype: object

In [94]:
df_qwen.iloc[651]

Unnamed: 0                                                          651
prompt                Generate the sentiment of the given text. 1 fo...
task                                                          Sentiment
domain                                                           Reddit
dialect                                                           en-AU
true_label                                                            0
base_instruct                                                         1
lora_grouping                                                         1
individual_dialect                                                    0
cat                                                                   0
ties                                                                  1
mixlora                                                               1
Name: 651, dtype: object