Cohinskappa: 

20% and 5% from each compay 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ast
from cohinkappa import create_sample, compute_random_cohens_kappa, save_kappa_results_to_csv



def parse_run_list(x):
    if isinstance(x, str):
        x = x.strip()
        try:
            return ast.literal_eval(x)
        except:
            return []
    return x


companies = ['FB', 'AMZN', 'AAPL', 'MSFT']
dimensions = ['Strength_runs', 'Specificity_runs', 'Persuasiveness_runs', 'Objectivity_runs']

model_configs = [
    {
        "model_name": "llama",
        "setting": "",
        "data_path": "/data/LlamaAnn_Temperature_default.csv",
        "output_csv": "results/cohen_kappa20_llama_default.csv"
    },
    {
        "model_name": "llama",
        "setting": "Temperature=0.3",
        "data_path": "/data/LlamaAnn_Temperature_03.csv",
        "output_csv": "results/cohen_kappa20_llama_temp3.csv"
    },
    {
        "model_name": "llama",
        "setting": "Temperature=0.7",
        "data_path": "/data/LlamaAnn_Temperature_07.csv",
        "output_csv": "results/cohen_kappa20_llama_temp7.csv"
    }

]

for cfg in model_configs:
    print(f"\n===== Processing Model: {cfg['model_name']} | Setting: {cfg['setting']} =====")

 
    combined_results_df = pd.read_csv(cfg["data_path"])
    
   
    for dim in dimensions:
        combined_results_df[dim] = combined_results_df[dim].apply(parse_run_list)
    
    #  Sample 20% data 
    sample_df = create_sample(
        combined_results_df,
        specified_companies=companies,
        fraction=0.20,
        random_state=42
    )
    

    sample_counts = sample_df['company_name'].value_counts()
    print("Sample counts per company:\n", sample_counts)


    kappa_results = compute_random_cohens_kappa(
        sample_df,
        dimensions,
        random_seed=42
    )


    print("Cohen's Kappa Results (two runs compared at random):")
    for dim, res in kappa_results.items():
        print(f"Dimension: {dim} | Runs: {res['Chosen Runs']} | Kappa: {res['Kappa']:.3f}")


    save_kappa_results_to_csv(
        kappa_results,
        filepath=cfg["output_csv"],
        model_name=cfg["model_name"],
        setting=cfg["setting"],
        decimal_places=2
    )
    
    print(f"Saved results to {cfg['output_csv']} for {cfg['model_name']} (Setting: {cfg['setting']})")
