In [27]:
import pandas as pd
from scipy import stats

In [28]:
all_sheets = pd.read_excel("./data/gemma-3-12b-single-sun.xlsx", sheet_name=None)
# List to hold dataframes with prefixed columns
dfs = []
for sheet_name, df in all_sheets.items():
    # Prefix columns with sheet name
    df_prefixed = df.add_prefix(f"{sheet_name}_")
    dfs.append(df_prefixed)
# Concatenate all dataframes column-wise
gemma = pd.concat(dfs, axis=1)
del dfs  # Free up memory

In [None]:
gemma = gemma.set_index("letters_new_names")
gemma.index.rename("names")
# Extract scores from Gemma spreadsheet
gemma = gemma.loc[:, gemma.filter(regex="score").columns]

gemma.info()

<class 'pandas.core.frame.DataFrame'>
Index: 562 entries, Oliviu Barbu to Loredana Diaconu
Data columns (total 90 columns):
 #   Column                                                  Non-Null Count  Dtype  
---  ------                                                  --------------  -----  
 0   letters_formal_and_structural_score_1                   562 non-null    int64  
 1   letters_formal_and_structural_score_2                   562 non-null    int64  
 2   letters_formal_and_structural_score_3                   562 non-null    int64  
 3   letters_formal_and_structural_score_4                   562 non-null    int64  
 4   letters_formal_and_structural_score_5                   562 non-null    int64  
 5   letters_formal_and_structural_final_score               562 non-null    object 
 6   letters_relevance_and_thematic_fit_score_1              562 non-null    int64  
 7   letters_relevance_and_thematic_fit_score_2              562 non-null    int64  
 8   letters_relevance_and

In [None]:
gemma_scores = pd.DataFrame()
# Recalculate mean scores
for score in gemma.filter(regex="final_score").columns:
    label = score.replace("_final_score", "")
    cols = gemma.filter(regex=f"{label}_score").columns
    gemma_scores[label] = gemma[cols].mean(axis=1)

gemma_scores.fillna(0, inplace=True)
gemma_scores.info()

<class 'pandas.core.frame.DataFrame'>
Index: 562 entries, Oliviu Barbu to Loredana Diaconu
Data columns (total 15 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   letters_formal_and_structural               562 non-null    float64
 1   letters_relevance_and_thematic_fit          562 non-null    float64
 2   letters_professionalism_and_research        562 non-null    float64
 3   letters_uniqueness_and_individuality        562 non-null    float64
 4   letters_potential_and_developmental         562 non-null    float64
 5   presentations_formal_and_structural         562 non-null    float64
 6   presentations_relevance_and_thematic_fit    562 non-null    float64
 7   presentations_professionalism_and_research  562 non-null    float64
 8   presentations_uniqueness_and_individuality  562 non-null    float64
 9   presentations_potential_and_developmental   562 non-null    float64


In [None]:
human_scores = pd.read_parquet("./data/dataset.parquet")
# Filter the columns to keep only the relevant scores
human_scores = human_scores[["prof_score", "all_phd_1", "all_phd_2", "offline_test_total"]]
human_scores["phd_score"] = human_scores[["all_phd_1", "all_phd_2"]].mean(axis=1)
human_scores = human_scores.drop(columns=["all_phd_1", "all_phd_2"])

human_scores.info()

<class 'pandas.core.frame.DataFrame'>
Index: 577 entries, Adam Mazilescu to Ștefania Albu
Data columns (total 3 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   prof_score          98 non-null     float64
 1   offline_test_total  48 non-null     float64
 2   phd_score           567 non-null    float64
dtypes: float64(3)
memory usage: 18.0+ KB


In [32]:
SELECTED_COLS = ["phd_score", "prof_score", "offline_test_total"]
# SELECTED_COLS = ["phd_score", "prof_score"]
# SELECTED_COLS = ["offline_test_total"]

In [33]:
pearson = pd.DataFrame(index=gemma_scores.columns, columns=SELECTED_COLS)
temp = pd.merge(human_scores, gemma_scores, left_index=True, right_index=True, how="inner")
for gemma_col in pearson.index:
    for human_col in pearson.columns:
        # Calculate Pearson correlation coefficient and p-value
        selection = temp[[gemma_col, human_col]].dropna(axis=0)
        coef, pvalue = stats.pearsonr(selection[gemma_col], selection[human_col])
        pearson.loc[gemma_col, human_col] = f"{coef:.4f} ({pvalue:.4f})"

In [None]:
spearman = pd.DataFrame(index=gemma_scores.columns, columns=SELECTED_COLS)
temp = pd.merge(gemma_scores, human_scores, left_index=True, right_index=True, how="inner")
for gemma_col in spearman.index:
    for human_col in spearman.columns:
        # Calculate Spearman correlation coefficient and p-value
        selection = temp[[gemma_col, human_col]].dropna(axis=0)
        coef, pvalue = stats.spearmanr(selection[gemma_col], selection[human_col])
        spearman.loc[gemma_col, human_col] = f"{coef:.4f} ({pvalue:.4f})"

In [35]:
pd.concat([spearman, pearson], axis=1, keys=["spearman", "pearson"])

Unnamed: 0_level_0,spearman,spearman,spearman,pearson,pearson,pearson
Unnamed: 0_level_1,phd_score,prof_score,offline_test_total,phd_score,prof_score,offline_test_total
letters_formal_and_structural,0.1382 (0.0010),-0.1262 (0.2156),-0.0797 (0.5900),0.2312 (0.0000),-0.1063 (0.2975),0.0182 (0.9022)
letters_relevance_and_thematic_fit,0.2928 (0.0000),0.1350 (0.1850),-0.1242 (0.4003),0.3798 (0.0000),0.0996 (0.3290),0.0029 (0.9844)
letters_professionalism_and_research,0.2533 (0.0000),0.0033 (0.9740),-0.1359 (0.3569),0.3171 (0.0000),0.0425 (0.6774),-0.0559 (0.7059)
letters_uniqueness_and_individuality,0.3026 (0.0000),0.0279 (0.7848),-0.1591 (0.2799),0.3707 (0.0000),0.0461 (0.6523),-0.0774 (0.6013)
letters_potential_and_developmental,0.3064 (0.0000),-0.0041 (0.9680),-0.2829 (0.0514),0.3765 (0.0000),0.0175 (0.8645),-0.1852 (0.2077)
presentations_formal_and_structural,0.0420 (0.3188),0.0213 (0.8349),-0.1088 (0.4617),0.0349 (0.4082),0.0697 (0.4954),-0.1257 (0.3946)
presentations_relevance_and_thematic_fit,0.0375 (0.3734),0.0929 (0.3627),-0.2617 (0.0724),0.0305 (0.4699),0.0803 (0.4316),-0.1955 (0.1829)
presentations_professionalism_and_research,0.0338 (0.4223),0.0739 (0.4693),-0.0812 (0.5831),0.0363 (0.3893),0.0573 (0.5751),-0.0539 (0.7162)
presentations_uniqueness_and_individuality,0.0710 (0.0918),0.1564 (0.1240),-0.1693 (0.2500),0.0517 (0.2198),0.0979 (0.3374),-0.1805 (0.2194)
presentations_potential_and_developmental,0.0497 (0.2382),0.0869 (0.3948),-0.1932 (0.1883),0.0420 (0.3192),0.0616 (0.5465),-0.1843 (0.2098)
