# imports

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from helpers.threshold_tester import ContextualThres, StaticThres, SyntaxThres
import pandas as pd
from sklearn.metrics import classification_report
from collections import defaultdict
import numpy as np

# static

In [7]:
static_unsupervised = pd.read_csv('../data/2_new_run/15072023_180241-mapped_unsupervised-static.tsv', sep='\t', index_col=0)
static_unsup_ident = pd.read_csv('../data/2_new_run/15072023_181327-mapped_unsup_ident-static.tsv', sep='\t', index_col=0)
static_semi_sup = pd.read_csv('../data/2_new_run/15072023_182411-mapped_semi_supervised-static.tsv', sep='\t', index_col=0)

In [8]:
static_gold = pd.read_csv('../data/2_new_run/static_gold.tsv', sep='\t', index_col=0)
static_gold

Unnamed: 0,film,sent_idx,src,tgt,label_static
0,AC01,0,delivered,overgedragen,Reproduction
1,AC01,1,his,zijn,Reproduction
2,AC01,5,swear,zweert,Reproduction
3,AC01,8,I,Dat,Creative Shift
4,AC01,37,Sit,Ga,Creative Shift
...,...,...,...,...,...
120,DR02,331,Poor,Armen,Reproduction
121,DR02,335,gotta,moeten,Reproduction
122,DR02,389,worthless,niks,Creative Shift
123,DR01,979,share,deel,Reproduction


In [48]:
def static_pred_creator(gold:pd.DataFrame, df:pd.DataFrame):
    pred = pd.DataFrame()
    for idx, i in gold.iterrows():
        pred = pd.concat([pred, df[
            (df.film == i.film) 
            & (df.sent_idx == i.sent_idx) 
            & (df.src == i.src) 
            & (df.tgt == i.tgt)].copy()
        ])
    return pred

## Unsupervised FastText

In [67]:
unsup_pred = static_pred_creator(static_gold, static_unsupervised)

In [68]:
# Some sentences contain the same words twice, such as 'be tough , be tough ,' 
# which leads to duplicate word pairs, they are removed as I do not take 
# them into consideration with the gold data.
display(unsup_pred[unsup_pred.duplicated()])
unsup_pred.drop_duplicates(inplace=True)

Unnamed: 0,film,genre,sent_idx,src_sent,tgt_sent,src,tgt,static_cosine,type
89889,CH04,children_family,615,"be tough , be tough ,",wees dan sterk,tough,sterk,0.466335,human
230838,CO10,comedy,602,"Yeah , you 're good . It 's okay . That 's okay .",Niks aan de hand . Helemaal niks aan de hand .,okay,hand,0.677257,human
324226,DO10,documentary,306,Sure . Sure .,Natuurlijk .,Sure,Natuurlijk,0.290294,human


In [69]:
unsup_pred

Unnamed: 0,film,genre,sent_idx,src_sent,tgt_sent,src,tgt,static_cosine,type
3,AC01,action,0,The Inquisition has finally delivered Spain to...,"Andalusië , Spanje , 1492 De Inquisitie heeft ...",delivered,overgedragen,0.475222,human
12,AC01,action,1,Sultan Muhammad and his people still hold out ...,Sultan Mohammed en zijn mensen zitten nog in G...,his,zijn,0.440071,human
37,AC01,action,5,swear to honor our Order in the fight for free...,zweert gij trouw aan de Orde ?,swear,zweert,0.535506,human
55,AC01,action,8,I swear .,Dat zweer ik .,I,Dat,0.614092,human
203,AC01,action,37,Sit down . You 're making me nervous .,Ga zitten . U maakt me nerveus .,Sit,Ga,0.636277,human
...,...,...,...,...,...,...,...,...,...
344090,DR02,drama,331,Poor men fighting a rich man 's war .,Armen die vochten voor de rijken .,Poor,Armen,0.742416,human
344127,DR02,drama,335,"Point is , us old soldiers gotta live , right ?",Wij oud-soldaten moeten ook rondkomen .,gotta,moeten,0.483638,human
344410,DR02,drama,389,"No , no . No , this is worthless .",Hier heb je niks aan .,worthless,niks,0.506961,human
340115,DR01,drama,979,I gave Bubba 's mama Bubba 's share .,gaf ik Bubba's deel aan zijn mama .,share,deel,0.557937,human


In [70]:
statf_u = StaticThres(gold_data=static_gold)
statf_u(unsup_pred)
statf_u.score()

      precision    recall  f1-score  support t_metric
0.40   0.627234  0.614912  0.431418    125.0    basic
0.45   0.651259  0.672807  0.518490    125.0    basic
0.50   0.666667  0.728070  0.634975    125.0    basic
0.55   0.580882  0.596491  0.583333    125.0    basic
0.60   0.613324  0.586842  0.593556    125.0    basic


In [71]:
statf_u.get_best()

Unnamed: 0,precision,recall,f1-score,support,t_metric
0.5,0.666667,0.72807,0.634975,125.0,basic


## Unsupervised Identical Word FastText

In [61]:
unsup_ident_pred = static_pred_creator(static_gold, static_unsup_ident)

In [62]:
# Some sentences contain the same words twice, such as 'be tough , be tough ,' 
# which leads to duplicate word pairs, they are removed as I do not take 
# them into consideration with the gold data.
display(unsup_ident_pred[unsup_ident_pred.duplicated()])
unsup_ident_pred.drop_duplicates(inplace=True)

Unnamed: 0,film,genre,sent_idx,src_sent,tgt_sent,src,tgt,static_cosine,type
89889,CH04,children_family,615,"be tough , be tough ,",wees dan sterk,tough,sterk,0.465088,human
230838,CO10,comedy,602,"Yeah , you 're good . It 's okay . That 's okay .",Niks aan de hand . Helemaal niks aan de hand .,okay,hand,0.679262,human
324226,DO10,documentary,306,Sure . Sure .,Natuurlijk .,Sure,Natuurlijk,0.292931,human


In [63]:
unsup_ident_pred

Unnamed: 0,film,genre,sent_idx,src_sent,tgt_sent,src,tgt,static_cosine,type
3,AC01,action,0,The Inquisition has finally delivered Spain to...,"Andalusië , Spanje , 1492 De Inquisitie heeft ...",delivered,overgedragen,0.474148,human
12,AC01,action,1,Sultan Muhammad and his people still hold out ...,Sultan Mohammed en zijn mensen zitten nog in G...,his,zijn,0.440958,human
37,AC01,action,5,swear to honor our Order in the fight for free...,zweert gij trouw aan de Orde ?,swear,zweert,0.532376,human
55,AC01,action,8,I swear .,Dat zweer ik .,I,Dat,0.611416,human
203,AC01,action,37,Sit down . You 're making me nervous .,Ga zitten . U maakt me nerveus .,Sit,Ga,0.639054,human
...,...,...,...,...,...,...,...,...,...
344090,DR02,drama,331,Poor men fighting a rich man 's war .,Armen die vochten voor de rijken .,Poor,Armen,0.749895,human
344127,DR02,drama,335,"Point is , us old soldiers gotta live , right ?",Wij oud-soldaten moeten ook rondkomen .,gotta,moeten,0.480705,human
344410,DR02,drama,389,"No , no . No , this is worthless .",Hier heb je niks aan .,worthless,niks,0.506652,human
340115,DR01,drama,979,I gave Bubba 's mama Bubba 's share .,gaf ik Bubba's deel aan zijn mama .,share,deel,0.559434,human


In [64]:
statf_i = StaticThres(gold_data=static_gold)
statf_i(unsup_ident_pred)
statf_i.score()

      precision    recall  f1-score  support t_metric
0.40   0.629441  0.620175  0.439677    125.0    basic
0.45   0.655784  0.683333  0.533582    125.0    basic
0.50   0.666667  0.728070  0.634975    125.0    basic
0.55   0.580882  0.596491  0.583333    125.0    basic
0.60   0.602604  0.581579  0.587218    125.0    basic


In [65]:
statf_i.get_best()

Unnamed: 0,precision,recall,f1-score,support,t_metric
0.5,0.666667,0.72807,0.634975,125.0,basic


## Semi-supervised FastText

In [76]:
semi_sup_pred = static_pred_creator(static_gold, static_semi_sup)

In [77]:
# Some sentences contain the same words twice, such as 'be tough , be tough ,' 
# which leads to duplicate word pairs, they are removed as I do not take 
# them into consideration with the gold data.
display(semi_sup_pred[semi_sup_pred.duplicated()])
semi_sup_pred.drop_duplicates(inplace=True)

Unnamed: 0,film,genre,sent_idx,src_sent,tgt_sent,src,tgt,static_cosine,type
89889,CH04,children_family,615,"be tough , be tough ,",wees dan sterk,tough,sterk,0.464757,human
230838,CO10,comedy,602,"Yeah , you 're good . It 's okay . That 's okay .",Niks aan de hand . Helemaal niks aan de hand .,okay,hand,0.679424,human
324226,DO10,documentary,306,Sure . Sure .,Natuurlijk .,Sure,Natuurlijk,0.291758,human


In [78]:
semi_sup_pred

Unnamed: 0,film,genre,sent_idx,src_sent,tgt_sent,src,tgt,static_cosine,type
3,AC01,action,0,The Inquisition has finally delivered Spain to...,"Andalusië , Spanje , 1492 De Inquisitie heeft ...",delivered,overgedragen,0.474036,human
12,AC01,action,1,Sultan Muhammad and his people still hold out ...,Sultan Mohammed en zijn mensen zitten nog in G...,his,zijn,0.442340,human
37,AC01,action,5,swear to honor our Order in the fight for free...,zweert gij trouw aan de Orde ?,swear,zweert,0.534009,human
55,AC01,action,8,I swear .,Dat zweer ik .,I,Dat,0.615675,human
203,AC01,action,37,Sit down . You 're making me nervous .,Ga zitten . U maakt me nerveus .,Sit,Ga,0.640848,human
...,...,...,...,...,...,...,...,...,...
344090,DR02,drama,331,Poor men fighting a rich man 's war .,Armen die vochten voor de rijken .,Poor,Armen,0.745218,human
344127,DR02,drama,335,"Point is , us old soldiers gotta live , right ?",Wij oud-soldaten moeten ook rondkomen .,gotta,moeten,0.479617,human
344410,DR02,drama,389,"No , no . No , this is worthless .",Hier heb je niks aan .,worthless,niks,0.507071,human
340115,DR01,drama,979,I gave Bubba 's mama Bubba 's share .,gaf ik Bubba's deel aan zijn mama .,share,deel,0.560364,human


In [79]:
statf_s = StaticThres(gold_data=static_gold)
statf_s(semi_sup_pred)
statf_s.score()

      precision    recall  f1-score  support t_metric
0.40   0.629441  0.620175  0.439677    125.0    basic
0.45   0.655784  0.683333  0.533582    125.0    basic
0.50   0.666667  0.728070  0.634975    125.0    basic
0.55   0.580882  0.596491  0.583333    125.0    basic
0.60   0.602604  0.581579  0.587218    125.0    basic


In [80]:
statf_s.get_best()

Unnamed: 0,precision,recall,f1-score,support,t_metric
0.5,0.666667,0.72807,0.634975,125.0,basic


## F-scores for original gold data scores from van der Heden

In [119]:
orig_data = pd.read_csv('../data/heden_samples_en_v2.tsv', sep='\t')
orig_scores = defaultdict(list)
for i in [.4, .45, .5, .55, .6]:
    orig_scores['thres'].append(i)
    orig_data[i] = ['Creative Shift' if row.score > i else 'Reproduction' for idx, row in orig_data.iterrows()]

    print(f'Threshold {i} classification report:')
    print(classification_report(orig_data['class'], orig_data[i], zero_division=0.0, output_dict=False))
    print('')

    for key, value in classification_report(orig_data['class'], orig_data[i], output_dict=True, zero_division=0.0)['macro avg'].items():
        orig_scores[key].append(value)

orig_scores_df = pd.DataFrame.from_dict(orig_scores).set_index('thres').style.highlight_max(subset=['f1-score'], color = 'lightgreen', axis = 0)
display(orig_scores_df)

Threshold 0.4 classification report:
                precision    recall  f1-score   support

Creative Shift       0.31      1.00      0.48        39
  Reproduction       0.00      0.00      0.00        86

      accuracy                           0.31       125
     macro avg       0.16      0.50      0.24       125
  weighted avg       0.10      0.31      0.15       125


Threshold 0.45 classification report:
                precision    recall  f1-score   support

Creative Shift       0.37      0.92      0.53        39
  Reproduction       0.89      0.28      0.42        86

      accuracy                           0.48       125
     macro avg       0.63      0.60      0.48       125
  weighted avg       0.73      0.48      0.46       125


Threshold 0.5 classification report:
                precision    recall  f1-score   support

Creative Shift       0.42      0.67      0.51        39
  Reproduction       0.79      0.58      0.67        86

      accuracy                        

Unnamed: 0_level_0,precision,recall,f1-score,support
thres,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.4,0.156,0.5,0.237805,125.0
0.45,0.628118,0.601073,0.475163,125.0
0.5,0.606503,0.624031,0.592996,125.0
0.55,0.553995,0.55158,0.552381,125.0
0.6,0.344,0.5,0.407583,125.0


## Overall scores

In [121]:
print('Macro avg scores for the original gold data from van der Heden')
display(orig_scores_df)
print('\nScores using the edited gold data:')
print('\nStatic unsupervised scores')
display(statf_u.comp_df.style.highlight_max(subset=['f1-score'], color = 'lightgreen', axis = 0))
print('\nStatic unsupervised identical scores')
display(statf_i.comp_df.style.highlight_max(subset=['f1-score'], color = 'lightgreen', axis = 0))
print('\nStatic semi-supervised scores')
display(statf_s.comp_df.style.highlight_max(subset=['f1-score'], color = 'lightgreen', axis = 0))

Macro avg scores for the original gold data from van der Heden


Unnamed: 0_level_0,precision,recall,f1-score,support
thres,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.4,0.156,0.5,0.237805,125.0
0.45,0.628118,0.601073,0.475163,125.0
0.5,0.606503,0.624031,0.592996,125.0
0.55,0.553995,0.55158,0.552381,125.0
0.6,0.344,0.5,0.407583,125.0



Scores using the edited gold data:

Static unsupervised scores


Unnamed: 0,precision,recall,f1-score,support,t_metric
0.4,0.627234,0.614912,0.431418,125.0,basic
0.45,0.651259,0.672807,0.51849,125.0,basic
0.5,0.666667,0.72807,0.634975,125.0,basic
0.55,0.580882,0.596491,0.583333,125.0,basic
0.6,0.613324,0.586842,0.593556,125.0,basic



Static unsupervised identical scores


Unnamed: 0,precision,recall,f1-score,support,t_metric
0.4,0.629441,0.620175,0.439677,125.0,basic
0.45,0.655784,0.683333,0.533582,125.0,basic
0.5,0.666667,0.72807,0.634975,125.0,basic
0.55,0.580882,0.596491,0.583333,125.0,basic
0.6,0.602604,0.581579,0.587218,125.0,basic



Static semi-supervised scores


Unnamed: 0,precision,recall,f1-score,support,t_metric
0.4,0.629441,0.620175,0.439677,125.0,basic
0.45,0.655784,0.683333,0.533582,125.0,basic
0.5,0.666667,0.72807,0.634975,125.0,basic
0.55,0.580882,0.596491,0.583333,125.0,basic
0.6,0.602604,0.581579,0.587218,125.0,basic


Looking at the overall scores, it is clear that the basic threshold with the best Macro Avg F1-score is 0.5 for both the original data and the edited gold data in all instances. The new scores are slightly higher, moving from 0.59 to 0.63. What can also be seen is that the scores for each of the Vecmap embeddings are slightly different, but overall almost the same, with the exact same f1-scores for the best threshold of 0.5.

The choice for the best FastText processing is not as clear, as the f1-scores for the best threshold are the same.

This leads me to conclude that none of the static embedding workflows are necessarily better than the others, leading to a baseline macro avg F1-score of 0.63 for testing.

In [126]:
statf_s.thres_dfs[0.5].to_csv('../results/static_semi_supervised_best_thres_gold.tsv', sep='\t')

In [131]:
static_semi_sup['static_labels'] = ['Creative Shift' if row.static_cosine > 0.5 else 'Reproduction' for idx, row in static_semi_sup.iterrows()]

In [192]:
best_thres_report = pd.DataFrame.from_dict(statf_s.thres_reports[0.5]).T
best_thres_report.loc['accuracy', 'precision'] = None
best_thres_report.loc['accuracy', 'recall'] = None
best_thres_report.loc['accuracy', 'support'] = best_thres_report.loc['macro avg', 'support']

In [194]:
best_thres_report.to_csv('../results/static_semi_supervised_best_thres_report.tsv', sep='\t')

In [140]:
statf_s.comp_df.to_csv('../results/static_semi_supervised_thresholds.tsv', sep='\t')
static_semi_sup.to_csv('../results/static_semi_supervised_data.tsv', sep='\t')

## Machine conversion

In [13]:
machine_static_semi_sup = pd.read_csv('../data/2_new_run/16072023_174925-mapped_semi_supervised-static_mt.tsv', sep='\t', index_col=0)
machine_static_semi_sup['type'] = 'machine'
mt_static_conv = StaticThres(static_gold, thresholds=[0.5,])
mt_static_conv.cur_thres = 0.5
machine_static_semi_sup['static_labels'] = [mt_static_conv.static_thres(i.static_cosine) for idx, i in machine_static_semi_sup.iterrows()]
machine_static_semi_sup

Unnamed: 0,film,genre,sent_idx,src_sent,tgt_sent,src,tgt,static_cosine,type,static_labels
0,AC01,action,0,The Inquisition has finally delivered Spain to...,De inquisitie heeft Spanje eindelijk uitgeleve...,The,De,0.435872,machine,Reproduction
1,AC01,action,0,The Inquisition has finally delivered Spain to...,De inquisitie heeft Spanje eindelijk uitgeleve...,Inquisition,inquisitie,0.301858,machine,Reproduction
2,AC01,action,0,The Inquisition has finally delivered Spain to...,De inquisitie heeft Spanje eindelijk uitgeleve...,has,heeft,0.126774,machine,Reproduction
3,AC01,action,0,The Inquisition has finally delivered Spain to...,De inquisitie heeft Spanje eindelijk uitgeleve...,finally,eindelijk,0.128372,machine,Reproduction
4,AC01,action,0,The Inquisition has finally delivered Spain to...,De inquisitie heeft Spanje eindelijk uitgeleve...,delivered,uitgeleverd,0.455937,machine,Reproduction
...,...,...,...,...,...,...,...,...,...,...
531523,DR10,drama,805,I guess it 's just us now .,Ik denk dat we nu alleen zijn .,.,.,0.166176,machine,Reproduction
531524,DR10,drama,806,Just us ?,Alleen wij ?,Just,Alleen,0.360403,machine,Reproduction
531525,DR10,drama,806,Just us ?,Alleen wij ?,us,Alleen,0.756379,machine,Creative Shift
531526,DR10,drama,806,Just us ?,Alleen wij ?,us,wij,0.441017,machine,Reproduction


In [14]:
machine_static_semi_sup.to_csv('../results/machine_static_semi_supervised_data.tsv', sep='\t')

# Context

## Human

In [11]:
context_hu = pd.read_csv('../data/2_new_run/12072023_171117-context.tsv', sep='\t', index_col=0)
contf = ContextualThres(gold_data='../data/context_gold_edits.tsv', human_label='label_context')
# contf = ContextualThres(gold_data='../data/context_gold_fixes.tsv', human_label='label_context')
contf(context_hu)

In [12]:
contf.score()

In [13]:
context_thres = contf.comp_df.reset_index(names='threshold')
context_thres.threshold = context_thres.threshold.map(lambda x: f'{x}')
context_thres.support = context_thres.support.map(lambda x: int(x))
display(context_thres.style.highlight_max(subset=['f1-score'], color = 'mediumseagreen', axis = 0).hide())

threshold,precision,recall,f1-score,support,t_metric
0.4,0.583719,0.620029,0.539689,125,word-major-minor
0.4,0.557051,0.581922,0.526515,125,sent-major-minor
0.4,0.583719,0.620029,0.539689,125,basic
0.45,0.587179,0.625184,0.546264,125,word-major-minor
0.45,0.555901,0.579529,0.53385,125,sent-major-minor
0.45,0.560734,0.587077,0.532826,125,basic
0.5,0.590676,0.630339,0.552834,125,word-major-minor
0.5,0.571834,0.597754,0.565185,125,sent-major-minor
0.5,0.592342,0.628314,0.583611,125,basic
0.55,0.581797,0.617636,0.548611,125,word-major-minor


In [16]:
contf.get_best()

Unnamed: 0,precision,recall,f1-score,support,t_metric
0.6,0.615793,0.636598,0.621976,125.0,basic


In [17]:
best_thres_report = pd.DataFrame.from_dict(contf.thres_reports[contf.get_best().index[0]][contf.get_best().iloc[0].t_metric]).T
best_thres_report.loc['accuracy', 'precision'] = None
best_thres_report.loc['accuracy', 'recall'] = None
best_thres_report.loc['accuracy', 'support'] = best_thres_report.loc['macro avg', 'support']

In [18]:
best_thres_report

Unnamed: 0,precision,recall,f1-score,support
creative shift,0.388889,0.5,0.4375,28.0
reproduction,0.842697,0.773196,0.806452,97.0
accuracy,,,0.712,125.0
macro avg,0.615793,0.636598,0.621976,125.0
weighted avg,0.741044,0.712,0.723806,125.0


In [19]:
contf.best_df().to_csv('../results/context_data.tsv', sep='\t')
best_thres_report.to_csv('../results/context_best_thres_report.tsv', sep='\t')
context_thres.to_csv('../results/context_thresholds.tsv', sep='\t')
contf.retrieve_best_gold().reset_index(names='orig_df_idx').to_csv('../results/context_best_thres_gold.tsv', sep='\t')

## Machine

In [92]:
context_mt = pd.read_csv('../data/2_new_run/16072023_180924-context_mt.tsv', sep='\t', index_col=0)
contf_mt = ContextualThres(gold_data='../data/context_mt_gold_edited.tsv', human_label='label_context')
contf_mt(context_mt)

In [93]:
contf_mt.score()

In [94]:
context_thres_mt = contf_mt.comp_df.reset_index(names='threshold')
context_thres_mt.threshold = context_thres_mt.threshold.map(lambda x: f'{x}')
context_thres_mt.support = context_thres_mt.support.map(lambda x: int(x))
display(context_thres_mt.style.highlight_max(subset=['f1-score'], color = 'mediumseagreen', axis = 0).hide())

threshold,precision,recall,f1-score,support,t_metric
0.4,0.59767,0.648213,0.561588,125,word-major-minor
0.4,0.566821,0.600622,0.540076,125,sent-major-minor
0.4,0.599303,0.632867,0.602398,125,basic
0.45,0.60105,0.653263,0.568084,125,word-major-minor
0.45,0.574026,0.610723,0.55256,125,sent-major-minor
0.45,0.613492,0.638889,0.620522,125,basic
0.5,0.607987,0.663364,0.581105,125,word-major-minor
0.5,0.577726,0.615773,0.558824,125,sent-major-minor
0.5,0.627173,0.63073,0.628867,125,basic
0.55,0.595471,0.644134,0.569707,125,word-major-minor


In [95]:
contf_mt.get_best()

Unnamed: 0,precision,recall,f1-score,support,t_metric
0.5,0.627173,0.63073,0.628867,125.0,basic


In [96]:
best_thres_report_mt = pd.DataFrame.from_dict(contf_mt.thres_reports[contf_mt.get_best().index[0]][contf_mt.get_best().iloc[0].t_metric]).T
best_thres_report_mt.loc['accuracy', 'precision'] = None
best_thres_report_mt.loc['accuracy', 'recall'] = None
best_thres_report_mt.loc['accuracy', 'support'] = best_thres_report_mt.loc['macro avg', 'support']

In [97]:
best_thres_report_mt

Unnamed: 0,precision,recall,f1-score,support
creative shift,0.407407,0.423077,0.415094,26.0
reproduction,0.846939,0.838384,0.84264,99.0
accuracy,,,0.752,125.0
macro avg,0.627173,0.63073,0.628867,125.0
weighted avg,0.755516,0.752,0.75371,125.0


In [98]:
contf_mt.best_df().to_csv('../results/machine_context_data.tsv', sep='\t')
best_thres_report_mt.to_csv('../results/machine_context_best_thres_report.tsv', sep='\t')
context_thres_mt.to_csv('../results/machine_context_thresholds.tsv', sep='\t')
contf_mt.retrieve_best_gold().reset_index(names='orig_df_idx').to_csv('../results/machine_context_best_thres_gold.tsv', sep='\t')

# Syntax

## Human

In [66]:
synt_hu = pd.read_csv('../results/syntax_data_input.tsv', sep='\t', index_col=0)
syntf = SyntaxThres()

In [67]:
syntf(synt_hu)

In [68]:
syntf.score()

In [69]:
synt_thres = syntf.comp_df.reset_index(names='threshold')
synt_thres.threshold = synt_thres.threshold.map(lambda x: f'{x}')
synt_thres.support = synt_thres.support.map(lambda x: int(x))
display(synt_thres.style.highlight_max(subset=['f1-score'], color = 'mediumseagreen', axis = 0).hide())

threshold,precision,recall,f1-score,support,t_metric
0.4,0.512733,0.514286,0.509804,100,sacr_cross_label
0.4,0.604039,0.621429,0.600906,100,label_label
0.4,0.552083,0.561905,0.537621,100,astred_label
0.45,0.512733,0.514286,0.509804,100,sacr_cross_label
0.45,0.583333,0.595238,0.582418,100,label_label
0.45,0.536526,0.542857,0.5281,100,astred_label
0.5,0.512733,0.514286,0.509804,100,sacr_cross_label
0.5,0.643986,0.621429,0.627776,100,label_label
0.5,0.591146,0.6,0.592944,100,astred_label
0.55,0.519305,0.521429,0.51745,100,sacr_cross_label


In [70]:
syntf.get_best()

Unnamed: 0,precision,recall,f1-score,support,t_metric
0.55,0.657343,0.628571,0.636175,100.0,label_label


In [71]:
best_thres_report = pd.DataFrame.from_dict(syntf.thres_reports[syntf.get_best().index[0]][syntf.get_best().iloc[0].t_metric]).T
best_thres_report.loc['accuracy', 'precision'] = None
best_thres_report.loc['accuracy', 'recall'] = None
best_thres_report.loc['accuracy', 'support'] = best_thres_report.loc['macro avg', 'support']

In [72]:
best_thres_report

Unnamed: 0,precision,recall,f1-score,support
creative shift,0.545455,0.4,0.461538,30.0
reproduction,0.769231,0.857143,0.810811,70.0
accuracy,,,0.72,100.0
macro avg,0.657343,0.628571,0.636175,100.0
weighted avg,0.702098,0.72,0.706029,100.0


In [73]:
syntf.best_df().to_csv('../results/syntax_data.tsv', sep='\t')
best_thres_report.to_csv('../results/syntax_best_thres_report.tsv', sep='\t')
synt_thres.to_csv('../results/syntax_thresholds.tsv', sep='\t')

## Machine

In [74]:
synt_mt = pd.read_csv('../results/machine_syntax_data_input.tsv', sep='\t', index_col=0)
syntf_mt = SyntaxThres()

In [75]:
syntf_mt(synt_mt)

In [76]:
syntf_mt.score()

In [77]:
synt_thres_mt = syntf_mt.comp_df.reset_index(names='threshold')
synt_thres_mt.threshold = synt_thres_mt.threshold.map(lambda x: f'{x}')
synt_thres_mt.support = synt_thres_mt.support.map(lambda x: int(x))
display(synt_thres_mt.style.highlight_max(subset=['f1-score'], color = 'mediumseagreen', axis = 0).hide())

threshold,precision,recall,f1-score,support,t_metric
0.4,0.512627,0.524621,0.493787,100,sacr_cross_label
0.4,0.520499,0.543561,0.491813,100,label_label
0.4,0.565062,0.638258,0.548278,100,astred_label
0.45,0.512627,0.524621,0.493787,100,sacr_cross_label
0.45,0.544647,0.583333,0.538143,100,label_label
0.45,0.57261,0.649621,0.562937,100,astred_label
0.5,0.512627,0.524621,0.493787,100,sacr_cross_label
0.5,0.607869,0.61553,0.611429,100,label_label
0.5,0.585478,0.666667,0.585826,100,astred_label
0.55,0.512627,0.524621,0.493787,100,sacr_cross_label


In [78]:
syntf_mt.get_best()

Unnamed: 0,precision,recall,f1-score,support,t_metric
0.6,0.752632,0.613636,0.646416,100.0,label_label


In [79]:
best_thres_report_mt = pd.DataFrame.from_dict(syntf_mt.thres_reports[syntf_mt.get_best().index[0]][syntf_mt.get_best().iloc[0].t_metric]).T
best_thres_report_mt.loc['accuracy', 'precision'] = None
best_thres_report_mt.loc['accuracy', 'recall'] = None
best_thres_report_mt.loc['accuracy', 'support'] = best_thres_report_mt.loc['macro avg', 'support']

In [80]:
best_thres_report_mt

Unnamed: 0,precision,recall,f1-score,support
creative shift,0.6,0.25,0.352941,12.0
reproduction,0.905263,0.977273,0.939891,88.0
accuracy,,,0.89,100.0
macro avg,0.752632,0.613636,0.646416,100.0
weighted avg,0.868632,0.89,0.869457,100.0


In [81]:
syntf_mt.best_df().to_csv('../results/machine_syntax_data.tsv', sep='\t')
best_thres_report_mt.to_csv('../results/machine_syntax_best_thres_report.tsv', sep='\t')
synt_thres_mt.to_csv('../results/machine_syntax_thresholds.tsv', sep='\t')