# Identify videos for manual labeling and evaluate results

In [1]:
import os
import json

import pandas as pd
import numpy as np

from charm.data import utils

%load_ext autoreload
%autoreload 2

## Load data

In [2]:
home_dir = os.path.expanduser('~')
raw_data_dir = os.path.join(home_dir, 'Documents/data/charm/raw')
anno_dfs = utils.load_ldc_annotations(raw_data_dir,)

transformed_dir = os.path.join(home_dir, 'Documents/data/charm/transformed')
meta_df = pd.read_csv(os.path.join(transformed_dir, 'metadata.csv'))

In [3]:
changepoint_df = pd.concat({k: anno_dfs[k]['anno_dfs']['changepoint.tab']
                             for k in anno_dfs},
                            ignore_index=True)

In [4]:
changepoint_df['impact_scalar'].value_counts()

3    378
4    280
2    193
5     59
1     25
Name: impact_scalar, dtype: int64

In [5]:
changepoint_df.head()

Unnamed: 0,user_id,file_id,timestamp,impact_scalar,comment
0,212,M01000FT6,287.0,5,Pre-change: Host talked about patterns of dirt...
1,212,M01000FT6,353.0,2,Pre-change: Host questioned why the male guest...
2,212,M01003MTK,64.0,5,Pre-change: female introduced the male to her ...
3,212,M01003MTK,239.0,1,Pre-change: The female and male speakers were ...
4,212,M01003MTK,267.0,5,Pre-change: The group chit-chat with the male ...


In [6]:
versions_df = pd.concat({k: anno_dfs[k]['versions_df']
                             for k in anno_dfs},
                            ignore_index=True)

In [7]:
change_anno_files = set(versions_df[versions_df['changepoint_count'] > 0]['file_id'].unique())
change_pos_anno_files = set(changepoint_df['file_id'].unique())
change_neg_anno_files = change_anno_files - change_pos_anno_files

In [8]:
len(change_pos_anno_files)

628

In [9]:
len(change_neg_anno_files)

406

## Identify 2 median length videos for annotation

In [10]:
# identify 2 videos that are median length that we have a high number of changepoint annotations for

In [11]:
changepoint_files = changepoint_df['file_id'].value_counts().to_frame()

In [12]:
changepoint_counts_df = changepoint_files.reset_index().rename(columns={'index':'file_uid', 'file_id':'changepoint_anno_count'})

In [13]:
merged_df = pd.merge(meta_df, changepoint_counts_df, how='inner', on='file_uid')

In [14]:
# identify median length of conversations
merged_df['utterance_count'].describe()

count     541.000000
mean      200.639556
std       161.036634
min        31.000000
25%       103.000000
50%       156.000000
75%       229.000000
max      1378.000000
Name: utterance_count, dtype: float64

In [19]:
meta_df['release'].value_counts()

R3           6684
R2           4914
Mini-Eval    2481
R1           2119
Name: release, dtype: int64

In [17]:
length_filter = (merged_df['utterance_count'] >= 100) & (merged_df['utterance_count'] <= 200)
merged_df[length_filter & (merged_df['url'] != 'na')].sort_values(by=['changepoint_anno_count'], ascending=False)# [['file_uid', 'url', 'changepoint_anno_count']].values

Unnamed: 0,release,catalog_id,file_uid,url,modality,start,end,transcribed,utterance_count,emotion_count,...,download_date,content_date,status_in_corpus,legacy_catalog_id,original_file_id,type,file_path,length,version,changepoint_anno_count
25,R2,LDC2022E19_R2,M01003M18,http://vd2.bdstatic.com/mda-nh4bibyv4v3ns4s0/c...,video,51.5,351.5,True,107.0,3.0,...,2022-09-14,na,present,,,,,,V1.0,4
21,R2,LDC2022E19_R2,M01003JLO,http://vd2.bdstatic.com/mda-ngt14pajd86dszc9/c...,video,9.0,309.0,True,101.0,3.0,...,2022-09-13,na,present,,,,,,V1.0,2
34,R2,LDC2022E19_R2,M01003S1K,https://www.bilibili.com/video/BV1kt4y1Q796,video,432.0,732.0,True,147.0,3.0,...,2022-09-17,na,present,,,,,,V2.0,2
39,R2,LDC2022E19_R2,M01003VPM,https://www.bilibili.com/video/BV1yS4y1d7S8,video,73.5,373.5,True,119.0,3.0,...,2022-09-21,na,present,,,,,,V1.0,2
41,R2,LDC2022E19_R2,M01003VVI,https://www.bilibili.com/video/BV1KP4y1A7AY,video,237.0,537.0,True,170.0,3.0,...,2022-09-21,na,present,,,,,,V2.0,2
29,R2,LDC2022E19_R2,M01003M2H,http://vd4.bdstatic.com/mda-nh5h99ac43uhqi6j/c...,video,55.5,355.5,True,117.0,3.0,...,2022-09-14,na,present,,,,,,V1.0,1
35,R2,LDC2022E19_R2,M01003T1X,https://www.bilibili.com/video/BV1Gi4y137hZ,video,200.0,500.0,True,106.0,3.0,...,2022-09-20,na,present,,,,,,V2.0,1
36,R2,LDC2022E19_R2,M01003VMI,http://vd2.bdstatic.com/mda-nhj1f2mb3byn9axg/c...,video,293.0,593.0,True,107.0,2.0,...,2022-09-21,na,present,,,,,,V1.0,1
37,R2,LDC2022E19_R2,M01003VNR,https://www.bilibili.com/video/BV1x44y1G7ZM,video,288.0,588.0,True,154.0,3.0,...,2022-09-21,na,present,,,,,,V2.0,1


## Evaluation

In [33]:
changepoint_df[changepoint_df['file_id'] == 'M01003MTK']

Unnamed: 0,user_id,file_id,timestamp,impact_scalar,comment,binary_impact_scalar,changepoint_occurred
2,212,M01003MTK,64,5,Pre-change: female introduced the male to her ...,1,True
3,212,M01003MTK,239,1,Pre-change: The female and male speakers were ...,0,True
4,212,M01003MTK,267,5,Pre-change: The group chit-chat with the male ...,1,True


In [28]:
# 0: good to bad, 1: bad to good
binary_map = {1: 0, 2: 0, 3: 0, 4: 1, 5: 1}

In [29]:
changepoint_df['binary_impact_scalar'] = changepoint_df['impact_scalar'].apply(lambda x: binary_map[x])

In [30]:
# binary changepoint flag
changepoint_df['changepoint_occurred'] = True

In [31]:
yanda_anno_file_df = changepoint_df[changepoint_df['file_id'] == 'M01003MTK'].copy()
yukun_anno_file_df = changepoint_df[changepoint_df['file_id'] == 'M01003MTK'].copy()

In [32]:
# load manual annotations
yanda_filepath = os.path.join(home_dir, 'Documents/datasets/charm/transformed/annotations/Circumplex Theory Annotations - Yanda - Sheet1.csv')
yukun_filepath = os.path.join(home_dir, 'Documents/datasets/charm/transformed/annotations/Circumplex Theory Annotations - Yukun - Sheet1.csv')

In [34]:
def convert_to_seconds(timestamp):
    mins, secs = timestamp.split(':')
    return int(mins)*60 + int(secs)

In [35]:
def load_manual_annotation(filepath):
    df = pd.read_csv(filepath, skiprows=1)
    # drop first col
    df = df[df.columns[1:]]

    # binary impact scalar (are these correct? - especially aloof-introverted, unassured-submissive, assured-dominant)
    tag_mapping = {'Gregarious-Extraverted': 1, 'Warm-Agreeable': 1, 'Arrogant-Calculating': 0, 
                   'Unassured-Submissive': 1, 'Cold': 0, 'Unassuming-Ingenuous': 1, 
                   'Aloof-Introverted': 0, 'Assured-Dominant': 0}

    df['timestamp_seconds'] = df['Timestamp'].apply(convert_to_seconds)
    df['binary_impact_scalar'] = df['Tag'].apply(lambda x: tag_mapping[x]).astype(float)
    df['matched'] = False
    return df

In [36]:
yanda_df = load_manual_annotation(yanda_filepath)
yukun_df = load_manual_annotation(yukun_filepath)

In [37]:
# TODO: generalize this
# filter Yukun's preds
yukun_df = yukun_df[yukun_df['File ID'] == 'M01003MTK']

In [38]:
def find_matches(anno_df, label_df, window_size=10):
    # for each ground-truth search +/- 10 seconds for a label
    # pull in any unmatched
    match_indicator = [] # true/false
    binary_pred = []
    for idx, row in anno_df.iterrows():
        start = row['timestamp'] - window_size
        end = row['timestamp'] + window_size
        filt_df = label_df[(label_df['timestamp_seconds'] >= start) & (label_df['timestamp_seconds'] <= end)]
        if len(filt_df) > 0:
            match_indicator.append(True)
            binary_pred.append(filt_df.iloc[0]['binary_impact_scalar'])
            # mark which ones we matched
            label_df.loc[filt_df.index, 'matched'] = [True] + [False]*(len(filt_df) - 1)
        else:
            match_indicator.append(False)
            binary_pred.append(-1)
    
    anno_df['matched'] = match_indicator
    anno_df['binary_pred'] = binary_pred
    return anno_df, label_df

In [39]:
yanda_anno_file_df, yanda_df = find_matches(yanda_anno_file_df, yanda_df)
yukun_anno_file_df, yukun_df = find_matches(yukun_anno_file_df, yukun_df)

In [40]:
def complete_predictions(anno_df, label_df):
    # extra preds
    extra_impact_scalar = label_df[~label_df['matched']]['binary_impact_scalar'].astype(float).values.tolist()
    changepoint_occurred = [True]*len(extra_impact_scalar)

    # complete preds
    changepoint_preds = anno_df['matched'].values.tolist() + changepoint_occurred
    binary_preds = anno_df['binary_pred'].values.tolist() + extra_impact_scalar

    # complete ground truth
    changepoint_ground_truth = anno_df['changepoint_occurred'].values.tolist() + [False]*len(changepoint_occurred)
    binary_ground_truth = anno_df['binary_impact_scalar'].values.tolist() + [-1]*len(changepoint_occurred)
    
    return {'changepoint_ground_truth': changepoint_ground_truth, 'changepoint_preds':changepoint_preds, 
            'binary_ground_truth': binary_ground_truth, 'binary_preds': binary_preds}

In [41]:
yanda_results = complete_predictions(yanda_anno_file_df, yanda_df)
yukun_results = complete_predictions(yukun_anno_file_df, yukun_df)

In [42]:
yanda_results

{'changepoint_ground_truth': [True,
  True,
  True,
  False,
  False,
  False,
  False,
  False],
 'changepoint_preds': [False, True, False, True, True, True, True, True],
 'binary_ground_truth': [1, 0, 1, -1, -1, -1, -1, -1],
 'binary_preds': [-1.0, 1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0]}

In [43]:
yukun_results

{'changepoint_ground_truth': [True, True, True, False, False, False],
 'changepoint_preds': [True, True, True, True, True, True],
 'binary_ground_truth': [1, 0, 1, -1, -1, -1],
 'binary_preds': [1.0, 0.0, 1.0, 0.0, 0.0, 1.0]}

In [44]:
from sklearn.metrics import classification_report

In [45]:
# Yanda changepoint preds
print(classification_report(y_true=yanda_results['changepoint_ground_truth'], y_pred=yanda_results['changepoint_preds']))

              precision    recall  f1-score   support

       False       0.00      0.00      0.00         5
        True       0.17      0.33      0.22         3

    accuracy                           0.12         8
   macro avg       0.08      0.17      0.11         8
weighted avg       0.06      0.12      0.08         8



In [53]:
# Yanda binary preds
print(classification_report(y_true=yanda_results['binary_ground_truth'], y_pred=yanda_results['binary_preds'], zero_division=0))

              precision    recall  f1-score   support

          -1       0.00      0.00      0.00       5.0
           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       2.0

    accuracy                           0.00       8.0
   macro avg       0.00      0.00      0.00       8.0
weighted avg       0.00      0.00      0.00       8.0



In [54]:
# Yukun changepoint preds
print(classification_report(y_true=yukun_results['changepoint_ground_truth'], y_pred=yukun_results['changepoint_preds'], zero_division=0))

              precision    recall  f1-score   support

       False       0.00      0.00      0.00         3
        True       0.50      1.00      0.67         3

    accuracy                           0.50         6
   macro avg       0.25      0.50      0.33         6
weighted avg       0.25      0.50      0.33         6



In [55]:
# Yukun binary preds
print(classification_report(y_true=yukun_results['binary_ground_truth'], y_pred=yukun_results['binary_preds'], zero_division=0))

              precision    recall  f1-score   support

          -1       0.00      0.00      0.00         3
           0       0.33      1.00      0.50         1
           1       0.67      1.00      0.80         2

    accuracy                           0.50         6
   macro avg       0.33      0.67      0.43         6
weighted avg       0.28      0.50      0.35         6

