# Figures

This script creates figures used for illustrative purposes in the paper

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

## Figure 1. Process of excluding videos

In [None]:
# Starting points for video uploads
r = np.arange(start=0, stop=48+1, step=12)
xlabs = pd.date_range(start='2021-11-09 00:00:00', end='2021-11-12 00:00:00', freq='12h')
d = pd.DataFrame(
    {
        'start':r,
        'window':[24]*len(r)
    }
)

# Plot
plt.barh(y=d.index, width=d['window'], left=d['start'])
plt.axvspan(xmin=24, xmax=48, color='red', alpha=0.5, label='Rollout day')

# Aesthetics
plt.xlabel('Time')
plt.xticks(
    ticks=np.arange(start=0, stop=r.max()+24+1, step=12),
    labels=xlabs.strftime('%b-%d %H:%M'),
    rotation=45
)
plt.ylabel('Type of video')
plt.yticks(
    ticks=range(len(d)),
    labels=['Control', 'Uncertain', 'Uncertain', 'Uncertain', 'Treatment']
)
plt.grid(axis='x')

# Save and show
# plt.savefig('../../fig/fig_ncr.png', dpi=200, bbox_inches='tight')
plt.show()

Confusion matrix

In [None]:
# Read labeled data set
df = pd.read_csv('../../dat/comments_sample_labeled.csv')

# Negative
t1 = pd.DataFrame(
    data=confusion_matrix(
        y_true=df['ncr1Hand'],
        y_pred=df['ncr1Vadr'],
    )
)

# Somewhat negative
t2 = pd.DataFrame(
    data=confusion_matrix(
        y_true=df['ncr1Hand'],
        y_pred=df['ncr1Vadr'],
    )
)

Confusion to rates

In [None]:
t1.div(t1.sum(axis=1), axis=0).round(3)

In [None]:
t2.div(t1.sum(axis=1), axis=0).round(3)