In [28]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import json

In [29]:
from matplotlib import rc
rc('font', family='sans-serif')
rc('font', size=13.0)
rc('text', usetex=False)
rc('figure', figsize=(11.69,8.27))

from matplotlib.font_manager import FontProperties

panel_label_font = FontProperties().copy()
panel_label_font.set_weight("bold")
panel_label_font.set_size(14.0)
panel_label_font.set_family("sans-serif")

# Seaborn styles
sns.set_style("whitegrid")

In [24]:
df = pd.read_csv("paperbuzz.csv", dtype={'status':str}, na_values=["{}", "None"])
df.set_index("id", inplace=True)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['date'] = pd.to_datetime(df['date'], errors = 'coerce')

df = df[~df.date.isnull()]

In [2]:
df.status.value_counts()

200                47994
ReadTimeout           35
ConnectionError       16
500                    3
SSLError               1
Name: status, dtype: int64

In [37]:
sources = set()
for resp in df.response.tolist():
    if not pd.isna(resp):
        j = json.loads(resp)
        if 'altmetrics_sources' in j:
            for s in j['altmetrics_sources']:
                sources.add(s['source_id'])

In [38]:
results = df.copy()
for s in sources:
    results[s] = None

In [39]:
def extract_metrics(row):
    if not pd.isna(row['response']):
        j = json.loads(row['response'])
        if 'altmetrics_sources' in j:
            for s in j['altmetrics_sources']:
                row[s['source_id']] = float(s['events_count'])
    return row

In [42]:
results = results.apply(extract_metrics, axis=1)
results.describe()

Unnamed: 0,crossref,twitter,newsfeed,wordpressdotcom,wikipedia,reddit,datacite
count,5.0,1873.0,8.0,13.0,61.0,4.0,226.0
mean,1.0,4.34063,1.75,1.153846,5.57377,1.0,5.212389
std,0.0,9.380452,1.035098,0.375534,9.385199,0.0,10.496096
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,1.0,1.0,1.0,1.0,1.0,1.0,2.0
50%,1.0,2.0,1.5,1.0,2.0,1.0,2.0
75%,1.0,4.0,2.0,1.0,6.0,1.0,2.0
max,1.0,238.0,4.0,2.0,67.0,1.0,98.0
