In [1]:
import os
import json
from datetime import datetime as dt
import plotly.graph_objects as go
from tqdm.auto import tqdm

In [2]:
# path to extracted data
DATA_DIR = '/home/mharoon/Desktop/HDD/Noyce/YouTube-Radicalizer/Analysis/puppets'
PUPPETS = os.listdir(DATA_DIR)

In [None]:
counter = {}
# count the number of sock puppets per day
for puppet in tqdm(PUPPETS):
    fp = os.path.join(DATA_DIR, puppet)
    with open(fp) as f:
        data = json.load(f)

    ideology = puppet.split(',')[0]
    if ideology not in counter:
        counter[ideology] = {}
    
    # parse end time in a unified format
    end_time = dt.fromisoformat(data['start_time'])
    end_time = end_time.strftime('%Y-%m-%d')
    
    counter[ideology][end_time] = counter[ideology].get(end_time, 0) + 1

In [4]:
ideologies = ['Far Left', 'Left', 'Moderate', 'Right', 'Far Right']
colors = ['blue', 'turquoise', 'purple', 'salmon', 'red']

In [9]:
dates = sorted(counter.keys())
values = [counter[i] for i in dates]

plot_data = []
for ideology, color in zip(ideologies, colors):
    counts = counter[ideology]
    dates = sorted(counts.keys())
    values = [counts[i] for i in dates]
    
    plot_data.append(
        go.Scatter(x=dates, y=values, name=ideology, marker=dict(color=color))
    )

fig = go.Figure(plot_data)
fig.update_xaxes(dict(
    title='Date'
))
fig.update_yaxes(dict(
    title='Number of Sock Puppets'
))
fig.update_layout(dict(
    title='Timeline of Data Collection'
))
fig.write_image('static/timeline.png', width=4*300, height=2*300, scale=1)