# Assess error annotation on a segment level

- read annotations from three different annotators
- de-compose Yawat annotations
- count and classify errors per segment
- correlate error type with production duration

In [1]:
import os
import sys
import os.path
import glob
import re
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sys.path.append('/data/critt/tprdb/bin/')
import TPRDB
import importlib
importlib.reload(TPRDB)

<module 'TPRDB' from '/data/critt/tprdb/bin/TPRDB.py'>

In [2]:
# read data from three annotators: A, B and C
SG = TPRDB.readTPRDBtables(["EN-ZH_IMBst18_ASSESSMENT_A/Tables/", "EN-ZH_IMBst18_ASSESSMENT_B/Tables/", "EN-ZH_IMBst18_ASSESSMENT_C/Tables/"], "*sg", path="/data/critt/tprdb/LONGHUI/", verbose = 1)


pd.set_option('display.max_columns', 500)
SG.head(3)

EN-ZH_IMBst18_ASSESSMENT_A/Tables/	#sessions:37	*sg:202
EN-ZH_IMBst18_ASSESSMENT_B/Tables/	#sessions:37	*sg:202
EN-ZH_IMBst18_ASSESSMENT_C/Tables/	#sessions:37	*sg:202


Unnamed: 0,Id,STseg,TTseg,Study,Session,SL,TL,Task,Text,Part,Nedit,Dur,FDur,PreGap,TG300,TD300,TB300,TG500,TD500,TB500,TG1000,TD1000,TB1000,TG2000,TD2000,TB2000,TG5000,TD5000,TB5000,Scatter,FixS,TrtS,FixT,TrtT,ParalFixS,ParalS,ParalFixT,ParalT,Ins,Del,MIns,MDel,AIns,ADel,SIns,SDel,TokS,LenS,TokT,LenT,LenMT,Yawat,String,TAGnbr,SAGnbr,HSgrp,Cross,HCross,HTra,HSTC,HTot,HTotN
0,1,1,1,EN-ZH_IMBst18_ASSESSMENT_A,P00_Ist3,en,zh,Ist,3,P00,1,7930,7930,5967,1348,615,2,1348,615,2,0,1963,0,0,1963,0,0,1963,0,0,61,19539,1,414,4,1248,0,0,33,0,33,0,0,0,0,0,23,119,17,31,0,S:cohesc+S:cohesc+S:cohesc+S:cohesc+S:unint+S:...,我们_一起_工作_，_在过去在_很有_挑战_的_环境_一起_工作_在_伊朗_伊拉克_和_和_。,0.48,0.91,0.93,2.3,0.93,0.93,0.93,2.09,0.32
1,2,2,2,EN-ZH_IMBst18_ASSESSMENT_A,P00_Ist3,en,zh,Ist,3,P00,1,1182,1182,541,498,143,1,0,641,0,0,641,0,0,641,0,0,641,0,0,11,2834,0,0,3,672,0,0,26,0,26,0,0,0,0,0,11,67,13,26,0,S:unint+S:mistr+T:addom+T:mistr,我们_感觉到_非常好_，_我们_能够_继续_地_去_增强_这个_合作关系_。,0.09,0.09,0.42,1.18,0.42,0.42,0.42,0.93,0.17
2,3,3,3,EN-ZH_IMBst18_ASSESSMENT_A,P00_Ist3,en,zh,Ist,3,P00,1,3273,3273,51,2717,505,2,2380,842,1,2380,842,1,2380,842,1,0,3222,0,0,29,8102,0,0,3,236,0,0,25,0,25,0,0,0,0,0,19,89,14,25,0,S:cohes+T:cohes+T:unint+T:unint,我_非常_高兴_我们_我们_在_这个_访问_期间_达成了_更多_的_进步_。,0.05,0.05,0.44,0.26,0.44,0.44,0.44,0.91,0.15


In [3]:
# The Yawat error annotation sting
C = SG['Yawat'].value_counts(ascending=False)
C.head(15)


---                                                                                34
S:addom+S:addom                                                                     7
T:unint                                                                             5
S:mistrc+T:mistrc+T:mistrc                                                          5
S:unint                                                                             4
S:mistrc+T:mistrc                                                                   4
T:addom+T:addom+T:addom+T:addom                                                     4
S:mistr+T:mistr+T:mistr                                                             4
T:addom                                                                             3
S:addom+T:addom+T:addom+T:addom+T:addom+T:addom+T:addom+T:addom+T:addom             3
S:addom+S:addom+S:addom+S:addom+S:addom+S:addom+S:addom+S:addom+S:addom+S:addom     3
S:cohes+T:cohes                                       

In [4]:
# splitting error string into its parts 
df = SG.Yawat.str.split('+').apply(pd.value_counts)
df = df.fillna(0)
df.head()

Unnamed: 0,S:cohesc,S:mistrc,T:mistrc,T:cohesc,S:unint,S:mistr,T:addom,T:mistr,T:unint,T:cohes,S:cohes,S:addom,T:wform,S:wform,T:wformc,S:wformc,---
0,4.0,3.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,2.0,2.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,2.0,2.0,1.0,0.0,3.0,0.0,6.0,1.0,0.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0


In [5]:
#count the errors by subcategory
df['Any'] = df.apply(lambda x: x.sum(), axis=1)
df['Accuracy'] = df['S:mistrc'] + df['T:mistrc'] + df['S:mistr'] + df['T:mistr'] + df['S:unint'] + df['T:unint']+ df['S:addom'] + df['T:addom']
df['Fluency'] = df['S:cohesc'] + df['T:cohesc'] + df['S:cohes'] + df['T:cohes'] 
df['Style'] = df['S:wform'] + df['T:wform']+ df['S:wformc'] + df['T:wformc']
df['Critical'] = df['S:mistrc'] + df['T:mistrc'] + df['S:cohesc'] + df['T:cohesc'] + df['S:wformc'] + df['T:wformc']+ df['S:addom'] + df['T:addom']
df['Minor'] = df['S:mistr'] + df['T:mistr'] + df['S:unint'] + df['T:unint'] +  df['S:cohes'] + df['T:cohes'] + df['S:wform'] + df['T:wform']
df.head()

Unnamed: 0,S:cohesc,S:mistrc,T:mistrc,T:cohesc,S:unint,S:mistr,T:addom,T:mistr,T:unint,T:cohes,S:cohes,S:addom,T:wform,S:wform,T:wformc,S:wformc,---,Any,Accuracy,Fluency,Style,Critical,Minor
0,4.0,3.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,6.0,6.0,0.0,11.0,1.0
1,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,1.0,3.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0,0.0,0.0,4.0
3,0.0,2.0,2.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,7.0,0.0,0.0,4.0,3.0
4,1.0,2.0,2.0,1.0,0.0,3.0,0.0,6.0,1.0,0.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0,22.0,16.0,2.0,4.0,8.0,14.0


In [None]:
df

# Segment level
segment-wise error average

In [21]:
# Join generalized Error codes with other features
df1 = df[['Any', 'Accuracy', 'Fluency', 'Style', 'Critical', 'Minor']]
SG1 = pd.concat([SG, df1], axis=1)


# unique Label 
SG1['SessionSeg'] = SG1['Session'] + '_' + SG1['STseg'].astype(str)

# normalized error score
SG1['TokST'] = SG1['TokS'] + SG1['TokT']
SG1['SEG-Any'] = SG1['Any'] / SG1['TokST']
SG1['SEG-Acc'] = SG1['Accuracy'] / SG1['TokST']
SG1['SEG-Flu'] = SG1['Fluency'] / SG1['TokST']
SG1['SEG-Sty'] = SG1['Style'] / SG1['TokST']
SG1['SEG-Cri'] = SG1['Critical'] / SG1['TokST']
SG1['SEG-Min'] = SG1['Minor'] / SG1['TokST']

df2 = SG1[SG1['Part'] != 'P00'].groupby("SessionSeg").agg('mean')

df3 = df2.sort_values(by="Any", ascending=False) 

print(df3.shape)
df3.head()

(195, 67)


Unnamed: 0_level_0,Id,STseg,TTseg,Text,Nedit,Dur,FDur,PreGap,TG300,TD300,TB300,TG500,TD500,TB500,TG1000,TD1000,TB1000,TG2000,TD2000,TB2000,TG5000,TD5000,TB5000,Scatter,FixS,TrtS,FixT,TrtT,ParalFixS,ParalS,ParalFixT,ParalT,Ins,Del,MIns,MDel,AIns,ADel,SIns,SDel,TokS,LenS,TokT,LenT,LenMT,TAGnbr,SAGnbr,HSgrp,Cross,HCross,HTra,HSTC,HTot,HTotN,Any,Accuracy,Fluency,Style,Critical,Minor,TokST,SEG-Any,SEG-Acc,SEG-Flu,SEG-Sty,SEG-Cri,SEG-Min
SessionSeg,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1
P04_Ist6_1,1.0,1.0,1.0,6.0,1.0,15584.0,15584.0,5450.0,9911.0,223.0,12.0,9522.0,612.0,11.0,3960.0,6174.0,3.0,0.0,10134.0,0.0,0.0,10134.0,0.0,0.0,32.0,11752.0,103.0,39064.0,20.0,6559.666667,16.333333,4663.0,50.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,42.0,254.0,28.0,50.0,0.0,10.23,15.396667,1.783333,12.986667,1.756667,1.94,1.94,1.84,0.223333,55.333333,54.0,1.333333,0.0,54.0,1.333333,70.0,0.790476,0.771429,0.019048,0.0,0.771429,0.019048
P02_Ist4_3,3.0,3.0,3.0,4.0,1.0,7491.0,7491.0,2640.0,4250.0,601.0,4.0,3830.0,1021.0,3.0,3080.0,1771.0,2.0,2010.0,2841.0,1.0,0.0,4851.0,0.0,0.0,10.0,3094.0,0.0,0.0,11.333333,6737.666667,0.333333,66.666667,80.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,46.0,230.0,45.0,80.0,0.0,3.106667,3.603333,1.216667,8.506667,1.253333,1.266667,1.266667,2.2,0.283333,45.0,45.0,0.0,0.0,42.333333,2.666667,91.0,0.494505,0.494505,0.0,0.0,0.465201,0.029304
P04_Ist2_3,3.0,3.0,3.0,2.0,1.0,6780.0,6780.0,1550.0,4800.0,430.0,2.0,4800.0,430.0,2.0,4800.0,430.0,2.0,4800.0,430.0,2.0,0.0,5230.0,0.0,0.0,53.0,17287.0,0.0,0.0,12.333333,3502.333333,0.0,0.0,41.0,0.0,41.0,0.0,0.0,0.0,0.0,0.0,41.0,243.0,20.0,41.0,0.0,3.926667,8.333333,0.896667,6.406667,0.896667,0.896667,0.896667,0.873333,0.106667,41.333333,40.0,1.333333,0.0,41.333333,0.0,61.0,0.677596,0.655738,0.021858,0.0,0.677596,0.0
P08_Ist6_1,1.0,1.0,1.0,6.0,1.0,10920.0,10920.0,4920.0,4900.0,1100.0,5.0,4400.0,1600.0,4.0,2330.0,3670.0,1.0,2330.0,3670.0,1.0,0.0,6000.0,0.0,0.0,91.0,29262.0,0.0,0.0,23.666667,7190.333333,0.0,0.0,48.0,0.0,48.0,0.0,0.0,0.0,0.0,0.0,42.0,254.0,28.0,48.0,0.0,4.356667,6.293333,1.14,7.476667,1.233333,1.29,1.29,1.84,0.223333,41.0,37.666667,3.333333,0.0,40.333333,0.666667,70.0,0.585714,0.538095,0.047619,0.0,0.57619,0.009524
P08_Ist4_3,3.0,3.0,3.0,4.0,1.0,2462.0,2462.0,200.0,720.0,1542.0,2.0,0.0,2262.0,0.0,0.0,2262.0,0.0,0.0,2262.0,0.0,0.0,2262.0,0.0,0.0,54.0,15643.0,0.0,0.0,21.333333,6326.666667,0.0,0.0,68.0,0.0,68.0,0.0,0.0,0.0,0.0,0.0,46.0,230.0,39.0,68.0,0.0,5.01,5.21,1.146667,17.796667,1.153333,1.153333,1.153333,2.2,0.283333,40.333333,35.333333,5.0,0.0,33.333333,7.0,85.0,0.47451,0.415686,0.058824,0.0,0.392157,0.082353


In [24]:
print("Correlation non-normalized errors per segment:\n", df3[['HTot','Any','Accuracy','Fluency','Style','Critical','Minor']].corr())
print("\nCorrelation TT-len normalized errors per segment:\n", df3[['HTot','SEG-Any','SEG-Acc','SEG-Flu','SEG-Sty','SEG-Cri','SEG-Min']].corr())


Correlation non-normalized errors per segment:
               HTot       Any  Accuracy   Fluency     Style  Critical     Minor
HTot      1.000000  0.493719  0.446912  0.122708  0.052498  0.462034  0.107389
Any       0.493719  1.000000  0.903377  0.248239  0.116314  0.913335  0.268737
Accuracy  0.446912  0.903377  1.000000 -0.176476 -0.002236  0.950449 -0.053863
Fluency   0.122708  0.248239 -0.176476  1.000000  0.019057 -0.033318  0.682649
Style     0.052498  0.116314 -0.002236  0.019057  1.000000 -0.015924  0.321656
Critical  0.462034  0.913335  0.950449 -0.033318 -0.015924  1.000000 -0.146571
Minor     0.107389  0.268737 -0.053863  0.682649  0.321656 -0.146571  1.000000

Correlation TT-len normalized errors per segment:
              HTot   SEG-Any   SEG-Acc   SEG-Flu   SEG-Sty   SEG-Cri   SEG-Min
HTot     1.000000  0.303762  0.286916 -0.015900  0.012495  0.295288 -0.061125
SEG-Any  0.303762  1.000000  0.900395  0.049941 -0.072638  0.945746 -0.150517
SEG-Acc  0.286916  0.900395  1.000