In [1]:
import os

import pandas as pd

from charm.eval.eval import mapping, categorize_pairs, precision, recall
from charm.data import utils

In [25]:
# load predictions
annotations_dir = '/home/iron-man/Documents/data/charm/transformed/annotations'
genglin = os.path.join(annotations_dir, 'Circumplex Theory Annotations - Genglin - Sheet1.csv')
jialiang = os.path.join(annotations_dir, 'Circumplex Theory Annotations - Jialiang - Sheet1.csv')
yukun = os.path.join(annotations_dir, 'Circumplex Theory Annotations - Yukun - Sheet1.csv')

genglin_df = pd.read_csv(genglin, skiprows=1, usecols=range(1, 9))
jialiang_df = pd.read_csv(jialiang, skiprows=1, usecols=range(1, 9))
yukun_df = pd.read_csv(yukun, skiprows=1, usecols=range(1, 9))

In [26]:
# load ground truth
anno_dir = '/home/iron-man/Documents/data/charm/raw/LDC2022E18_CCU_TA1_Mandarin_Chinese_Development_Annotation_V3.0'
anno_dfs, segment_df, version_df = utils.load_ldc_annotation(anno_dir)

In [27]:
modality_df = anno_dfs['changepoint.tab']

In [28]:
# set thresholds
delta = 10
llr_threshold = 0

In [29]:
def convert_to_seconds(timestamp):
    mins, secs = timestamp.split(':')
    return int(mins)*60 + int(secs)

In [30]:
def clean_df(df):
    # drop rows where timestamp is empty
    df = df[df['Timestamp'].notnull()].reset_index(drop=True)
    df['llr'] = 1.0
    df = df.rename(columns={'File ID': 'file_id', 'Timestamp': 'timestamp', 'Annotator Notes': 'annotator_notes', 'Tag': 'tag'}, errors='ignore')
    cols = ['file_id', 'timestamp', 'llr', 'tag', 'annotator_notes', 'URL']
    df = df[cols]
    df['timestamp'] = df['timestamp'].apply(convert_to_seconds)
    return df

In [31]:
genglin_df = clean_df(genglin_df)
jialiang_df = clean_df(jialiang_df)
yukun_df = clean_df(yukun_df)

### Approach 1: assume all annotated points are change points

In [32]:
# genglin_df['file_id'].unique()
# yukun_df['file_id'].unique()
# modality_df[modality_df['file_id'].isin(yukun_df['file_id'].unique())]['file_id'].unique()

In [33]:
def evaluate(df, modality_df, delta=10, llr_threshold=0):
    mappings = {}
    file_counts = {}
    threshold_counts = {
        'correct': 0,
        'false_positive': 0,
        'false_negative': 0,
    }
    for file_id in df['file_id'].unique():
        system_dict = df[df['file_id'] == file_id].to_dict('records')
        reference_dict = modality_df[modality_df['file_id'] ==
                                     file_id].to_dict('records')

        correct_pairs, system_misses, reference_misses = mapping(
            system_dict, reference_dict, delta)
        mappings[file_id] = {
            'correct_pairs': correct_pairs,
            'system_misses': system_misses,
            'reference_misses': reference_misses,
        }
        print(correct_pairs)

        file_counts_ = categorize_pairs(**mappings[file_id], threshold=llr_threshold)
        file_counts[file_id] = file_counts_
        # add file counts to threshold counts
        for key in threshold_counts:
            threshold_counts[key] += file_counts_[key]

    return precision(threshold_counts), recall(threshold_counts), mappings

In [34]:
genglin_precision, genglin_recall, genglin_mappings = evaluate(genglin_df, modality_df, delta=delta, llr_threshold=llr_threshold)

[]
[({'file_id': 'M01003M18', 'timestamp': 305, 'llr': 1.0, 'tag': 'Arrogant-Calculating', 'annotator_notes': 'Now he is also accusing the female for being lazy and started using stronger words ', 'URL': 'http://vd2.bdstatic.com/mda-nh4bibyv4v3ns4s0/cae_h264/1659758632534327528/mda-nh4bibyv4v3ns4s0.mp4'}, {'user_id': 212, 'file_id': 'M01003M18', 'timestamp': 308, 'impact_scalar': 1, 'comment': 'Pre-change: The husband kept trying calmly to persuade the wife while wife was busy with her cleaning, not reacting much to the husband, until the husband complained that the wife was too lazy. Shift: The wife became angry and stopped what she was doing and argued with the husband. Evidence: The wife looked at the husband angrily and raised her voice while stopped her cleaning and started talking back.'}), ({'file_id': 'M01003M18', 'timestamp': 315, 'llr': 1.0, 'tag': 'Assured-Dominant', 'annotator_notes': 'She is very assertive and insists that she is not going to take the job', 'URL': 'http://

In [45]:
fps = 0
for key in genglin_mappings.keys():
    fps += len(genglin_mappings[key]['system_misses'])
fps

19

In [43]:
# possible change points
possible = [
  {'file_id': 'M01003S1K',
  'timestamp': 17,
  'llr': 1.0,
  'tag': 'nan',
  'annotator_notes': "I'd say none of the tags really capture their tone (angry for girl, shock/awkward for boy),playful flirt, etc",
  'URL': 'https://www.bilibili.com/video/BV1kt4y1Q796'},
  {'file_id': 'M01003M18',
  'timestamp': 11,
  'llr': 1.0,
  'tag': 'Arrogant-Calculating',
  'annotator_notes': 'As soon as she appears in the camera she starts speaking aggressively',
  'URL': 'http://vd2.bdstatic.com/mda-nh4bibyv4v3ns4s0/cae_h264/1659758632534327528/mda-nh4bibyv4v3ns4s0.mp4'},
  {'file_id': 'M01003JLO',
  'timestamp': 60,
  'llr': 1.0,
  'tag': 'Arrogant-Calculating',
  'annotator_notes': 'She is being very dismissive and aggressive to the male, snaps at him',
  'URL': 'http://vd2.bdstatic.com/mda-ngt14pajd86dszc9/cae_h264/1658970258963454944/mda-ngt14pajd86dszc9.mp4'},
 {'file_id': 'M01003JLO',
  'timestamp': 200,
  'llr': 1.0,
  'tag': 'Arrogant-Calculating',
  'annotator_notes': 'Now the woman is asking the guy to give her 13000 rmb to help her brother, "if he wants another shot to be with her"',
  'URL': 'http://vd2.bdstatic.com/mda-ngt14pajd86dszc9/cae_h264/1658970258963454944/mda-ngt14pajd86dszc9.mp4'},
 {'file_id': 'M01003JLO',
  'timestamp': 2,
  'llr': 1.0,
  'tag': 'Cold',
  'annotator_notes': 'She is very impatient from the second she appears and asks the guy not to "come in" to the apartment',
  'URL': 'http://vd2.bdstatic.com/mda-ngt14pajd86dszc9/cae_h264/1658970258963454944/mda-ngt14pajd86dszc9.mp4'}
]

In [None]:
evaluate(jialiang_df, modality_df, delta=delta, llr_threshold=llr_threshold)

[]
[({'file_id': 'M01003M18', 'timestamp': 136, 'llr': 1.0, 'annotator_notes': "The woman rejected the man's proposal in a cold way, and continued to complain about the job being tiring."}, {'user_id': 212, 'file_id': 'M01003M18', 'timestamp': 144, 'impact_scalar': 1, 'comment': 'Pre-change: The wife was not happy, but she was still quite controlled. Shift: The wife became much irritated and looked very angry. She started ranting. It looked like "I had enough and can\'t hold the anger anymore".  Evidence: The wife had a long pause, then put her hand on her waist and sighed. She also sneered and raised her voice.'}), ({'file_id': 'M01003M18', 'timestamp': 314, 'llr': 1.0, 'annotator_notes': 'She took issue with the man\'s proposals in a more aggressive way. She even took all the charges of being "lazy" and "not willing to work" from the man just to show her rejection of the job.'}, {'user_id': 212, 'file_id': 'M01003M18', 'timestamp': 308, 'impact_scalar': 1, 'comment': 'Pre-change: The

(0.125, 0.2)

In [13]:
evaluate(yukun_df, modality_df, delta=delta, llr_threshold=llr_threshold)

[]
[({'file_id': 'M01003YN6', 'timestamp': 40, 'llr': 1.0, 'annotator_notes': 'Woman and man are arguing'}, {'user_id': 212, 'file_id': 'M01003YN6', 'timestamp': 38, 'impact_scalar': 1, 'comment': 'Pre-change: The female served the male fruit to mend their relationship. The male was a bit upset, but still controlled. The mood was a bit down, but still calm. Shift: The male started complaining, and the mood became more intense. Evidence: Male pointed at the female and raised his voice when talking back to the female.'}), ({'file_id': 'M01003YN6', 'timestamp': 120, 'llr': 1.0, 'annotator_notes': 'Not sure'}, {'user_id': 212, 'file_id': 'M01003YN6', 'timestamp': 119, 'impact_scalar': 4, 'comment': "Pre-change: The male and female complained about each other's bad behavior, and the mood was quite intense. Shift: The female took a seat by the male and they shared the fruit together. The mood was much relaxed as their relationship improved. Evidence: The female sat beside the male and the ma

(0.38095238095238093, 0.47058823529411764)

In [14]:
for delta in range(10, 30):
    print(delta, evaluate(genglin_df, modality_df, delta=delta, llr_threshold=llr_threshold))

[]
[({'file_id': 'M01003M18', 'timestamp': 305, 'llr': 1.0, 'annotator_notes': 'Now he is also accusing the female for being lazy and started using stronger words '}, {'user_id': 212, 'file_id': 'M01003M18', 'timestamp': 308, 'impact_scalar': 1, 'comment': 'Pre-change: The husband kept trying calmly to persuade the wife while wife was busy with her cleaning, not reacting much to the husband, until the husband complained that the wife was too lazy. Shift: The wife became angry and stopped what she was doing and argued with the husband. Evidence: The wife looked at the husband angrily and raised her voice while stopped her cleaning and started talking back.'}), ({'file_id': 'M01003M18', 'timestamp': 315, 'llr': 1.0, 'annotator_notes': 'She is very assertive and insists that she is not going to take the job'}, {'user_id': 212, 'file_id': 'M01003M18', 'timestamp': 325, 'impact_scalar': 1, 'comment': 'Pre-change: The husband pointed out the age of the wife while the wife kept doing cleaning

In [87]:
for delta in range(10, 30):
    print(delta, evaluate(jialiang_df, modality_df, delta=delta, llr_threshold=llr_threshold))

10 (0.13333333333333333, 0.2)
11 (0.13333333333333333, 0.2)
12 (0.13333333333333333, 0.2)
13 (0.13333333333333333, 0.2)
14 (0.13333333333333333, 0.2)
15 (0.13333333333333333, 0.2)
16 (0.13333333333333333, 0.2)
17 (0.13333333333333333, 0.2)
18 (0.13333333333333333, 0.2)
19 (0.13333333333333333, 0.2)
20 (0.13333333333333333, 0.2)
21 (0.13333333333333333, 0.2)
22 (0.13333333333333333, 0.2)
23 (0.13333333333333333, 0.2)
24 (0.13333333333333333, 0.2)
25 (0.13333333333333333, 0.2)
26 (0.13333333333333333, 0.2)
27 (0.13333333333333333, 0.2)
28 (0.13333333333333333, 0.2)
29 (0.13333333333333333, 0.2)
