# Preparation

In [None]:
# Import necessary packages
import pandas as pd
from sklearn.metrics import classification_report

# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Read GPT annotations for one prompting strategy.

In [None]:
my_prompt = 'complex_three_classes'

GPT_df = pd.read_excel('/content/drive/MyDrive/CHR2024/GPT_results/GPT_test_GPT4_prompt_'+my_prompt+'.xlsx', index_col=0)
curated_df = pd.read_excel('/content/drive/MyDrive/CHR2024/Curation/GPT_test.xlsx')
curated_df.rename(columns={"curation": "label"}, inplace=True)

full_df = GPT_df
full_df['curation'] = curated_df['label'].tolist()

Define functions.

In [None]:
def label_maker(x):
  if 'ind' in x:
    return 'eval_individual'
  elif x == 'generic_val' or x == 'social' or x == 'aesthetic':
    return 'eval_generic'
  else:
    return x


def label_maker_binary(x):
  if x != 'no_val':
    return 'val'
  else:
    return x

Print classification reports (with post-hoc label mappings).

In [None]:
gpt_annotations = GPT_df['label']
curation_annotations = curated_df['label']

print("All labels:")
print(classification_report(curation_annotations,
                            gpt_annotations, digits=3))

print("Three classes:")
gpt_annotations = gpt_annotations.map(label_maker)
curation_annotations = curation_annotations.map(label_maker)
print(classification_report(curation_annotations,
                            gpt_annotations, digits=3))

print("Binary:")
gpt_annotations = gpt_annotations.map(label_maker_binary)
curation_annotations = curation_annotations.map(label_maker_binary)
print(classification_report(curation_annotations,
                            gpt_annotations, digits=3))


All labels:
                 precision    recall  f1-score   support

      aesthetic      0.000     0.000     0.000       131
   eval_generic      0.000     0.000     0.000         0
eval_individual      0.000     0.000     0.000         0
    generic_val      0.000     0.000     0.000       108
  ind_cognitive      0.000     0.000     0.000        15
  ind_emotional      0.000     0.000     0.000        75
  ind_pragmatic      0.000     0.000     0.000        15
         no_val      0.976     0.761     0.855       854
         social      0.000     0.000     0.000         5

       accuracy                          0.540      1203
      macro avg      0.108     0.085     0.095      1203
   weighted avg      0.693     0.540     0.607      1203

Three classes:
                 precision    recall  f1-score   support

   eval_generic      0.535     0.816     0.646       244
eval_individual      0.412     0.648     0.504       105
         no_val      0.976     0.761     0.855       854


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Print overall comparison of F1-macro scores (for all prompting strategies).

In [None]:
from sklearn.metrics import f1_score

my_strategies = ['complex', 'simple', 'procedural']
my_simplifications = ['full', 'three_classes', 'binary']

full = list()
full_3class = list()
full_2class = list()
three_classes_3class = list()
three_classes_2class = list()
binary_2class = list()

for my_strategy in my_strategies:
  for my_simplification in my_simplifications:
    my_prompt =  my_strategy + '_' + my_simplification

    GPT_df = pd.read_excel('/content/drive/MyDrive/CHR2024/GPT_results/GPT_test_GPT4_prompt_'+my_prompt+'.xlsx', index_col=0)
    curated_df = pd.read_excel('/content/drive/MyDrive/CHR2024/Curation/GPT_test.xlsx')
    curated_df.rename(columns={"curation": "label"}, inplace=True)

    gpt_annotations = GPT_df['label']
    curation_annotations = curated_df['label']

    if my_simplification == 'full':

      full.append(f1_score(curation_annotations, gpt_annotations, average='macro'))

      gpt_annotations = gpt_annotations.map(label_maker)
      curation_annotations = curation_annotations.map(label_maker)

      full_3class.append(f1_score(curation_annotations, gpt_annotations, average='macro'))

      gpt_annotations = gpt_annotations.map(label_maker_binary)
      curation_annotations = curation_annotations.map(label_maker_binary)

      full_2class.append(f1_score(curation_annotations, gpt_annotations, average='macro'))

    if my_simplification == 'three_classes':

      gpt_annotations = gpt_annotations.map(label_maker)
      curation_annotations = curation_annotations.map(label_maker)

      three_classes_3class.append(f1_score(curation_annotations, gpt_annotations, average='macro'))

      gpt_annotations = gpt_annotations.map(label_maker_binary)
      curation_annotations = curation_annotations.map(label_maker_binary)

      three_classes_2class.append(f1_score(curation_annotations, gpt_annotations, average='macro'))

    if my_simplification == 'binary':

      gpt_annotations = gpt_annotations.map(label_maker_binary)
      curation_annotations = curation_annotations.map(label_maker_binary)

      binary_2class.append(f1_score(curation_annotations, gpt_annotations, average='macro'))


results_df = pd.DataFrame({'full':full, 'full_3':full_3class, 'full_2':full_2class, 'three_classes_3':three_classes_3class, 'three_classes_2':three_classes_2class, 'binary_2':binary_2class})
results_df.index = my_strategies

results_df

Unnamed: 0,full,full_3,full_2,three_classes_3,three_classes_2,binary_2
complex,0.424565,0.633575,0.758039,0.668357,0.803478,0.824839
simple,0.335887,0.531353,0.717034,0.629608,0.800735,0.779271
procedural,0.386123,0.674531,0.797112,0.641045,0.807307,0.803318


## Few shot strategy

Read GPT annotations for one few-shot prompting strategy.

In [None]:
my_shots = 8
my_prompt = 'complex_binary'

GPT_df = pd.read_excel('/content/drive/MyDrive/CHR2024/GPT_results/fewshot_'+str(my_shots)+'_GPT_test_GPT4_prompt_'+my_prompt+'.xlsx', index_col=0)
curated_df = pd.read_excel('/content/drive/MyDrive/CHR2024/Curation/GPT_test.xlsx')
curated_df.rename(columns={"curation": "label"}, inplace=True)

full_df = GPT_df
full_df['curation'] = curated_df['label'].tolist()

Print classification report.

In [None]:
gpt_annotations = GPT_df['label']
curation_annotations = curated_df['label']

gpt_annotations = gpt_annotations.map(label_maker_binary)
curation_annotations = curation_annotations.map(label_maker_binary)
print(classification_report(curation_annotations,
                            gpt_annotations, digits=3))

              precision    recall  f1-score   support

      no_val      0.956     0.657     0.779       854
         val      0.524     0.926     0.669       349

    accuracy                          0.735      1203
   macro avg      0.740     0.791     0.724      1203
weighted avg      0.831     0.735     0.747      1203

