In [None]:
# warnings
import warnings
warnings.filterwarnings('ignore')

# import modules
from twitter import *
from urllib.error import HTTPError
import numpy as np
import pandas as pd
import json
import time

# plots
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

%matplotlib inline
matplotlib.style.use('seaborn-ticks')

In [None]:
# temp util
def timestamp_attrs(data, col='created_at', tz='America/Lima', passtz=True):
    '''
    '''
    if passtz:
        t = pd.to_datetime(
            data[col],
            utc=True,
            infer_datetime_format=True
        ).apply(lambda x: x.tz_convert(tz))
    else:
        t = pd.to_datetime(
            data[col],
            infer_datetime_format=True
        )

    data[f'{col}_timestamp'] = t
    data[f'{col}'] = t.dt.strftime('%Y-%m-%d')
    data[f'{col}_year'] = t.dt.year
    data[f'{col}_month'] = t.dt.month
    data[f'{col}_day'] = t.dt.day
    data[f'{col}_hour'] = t.dt.hour
    data[f'{col}_weekday'] = t.dt.dayofweek
    data[f'{col}_time_hour'] = t.dt.strftime('%H:%M:%S')
    
    return data

**`Twitter Auth`**

In [None]:
# read credentials
with open('../_config/twitter_auth.json') as f:
    auth = json.load(f)
    f.close()

# Auth
Auth = OAuth(**auth)

# connection
cxn = Twitter(auth=Auth)

**`Get users`**

    path = 'C:/Users/Usuario/Downloads/1EDITtop_authorsSolidaridad_Hermandad.txt'
    data = [i.rstrip() for i in open(path, mode='r', encoding='utf-8')]

In [None]:
path = 'C:/i/repositories/DigitalResearch/data/kpop/users_by_engagement_no_rts.csv'
data = pd.read_csv(path, encoding='utf-8', converters={'id_str': str})
data.shape

In [None]:
data['id_str'].unique().shape

In [None]:
data['id_str'].value_counts().head()

In [None]:
data_ = data['id_str'].unique().tolist()

In [None]:
print (f'Total n of accounts ---> {len(data_)}')

In [None]:
division = 100
ids = [data_[i: i + division] for i in range(0, len(data_), division)]
print (f'Total n of batches ----> {len(ids)}')

In [None]:
all_ = []
for accounts in ids:
    users_lookup = cxn.users.lookup(user_id=','.join(accounts))
    all_.extend(users_lookup)

In [None]:
print (f'Total data downloaded ---> {len(all_)}')

**`Save data as json`**

In [None]:
# save data as json
save_path = 'C:/i/repositories/DigitalResearch/data/kpop/users_by_engagement_no_rts.json'
objects = json.dumps(all_, ensure_ascii=False, indent=2)
writer = open(save_path, mode='w', encoding='utf-8')
writer.write(objects)
writer.close()

In [None]:
miss = [i for i in data if i not in [j['id_str'] for j in all_]]
len(miss)

In [None]:
data = pd.read_csv(path, encoding='utf-8', converters={'id': str})
data.shape

In [None]:
miss_names = data[data['id'].isin(miss)]['author'].tolist()
print (miss_names)

In [None]:
writer = open('C:/i/repositories/DigitalResearch/data/sputnik_audience/other_link_sharing_twitter_missing.txt', mode='a')
for author in miss_names:
    writer.write(f'{author}\n')
writer.close()

**`Plot creation date`**

In [None]:
p = 'C:/i/repositories/DigitalResearch/data/bolivian_elections/Pro-MAS_top_authors_twitter_api.json'
with open(p, encoding='utf-8') as f:
    dataset = json.load(f)
    f.close()
    
# total data
print (f'total accounts --> {len(dataset)}')

In [None]:
# create dataframe
df = pd.DataFrame.from_dict(dataset)
df.shape

In [None]:
# format creation date
df = timestamp_attrs(df, tz='America/La_Paz')

In [None]:
df.shape

In [None]:
df['counter'] = 1

In [None]:
_key_ = 'created_at'
df_gpo = df.groupby(_key_) \
    .agg({'counter': sum}) \
    .reset_index()

# format date
df_gpo.head()

In [None]:
df_gpo[_key_] = pd.to_datetime(df_gpo[_key_])

In [None]:
df_gpo[_key_].iloc[0]

In [None]:
df_gpo = df_gpo[df_gpo[_key_] >= '2019-08-01']

In [None]:
fig, ax = plt.subplots(figsize=(16,8))

# palette
cm = sns.dark_palette((260, 75, 60), input='husl', n_colors=1, reverse=True, as_cmap=True)
sns.lineplot(x=_key_, y='counter', data=df_gpo, linewidth=1,
             ax=ax, alpha=0.5, legend=False, palette=cm)

# set ticks every hour
ax.xaxis.set_major_locator(mdates.MonthLocator())

# set major ticks format
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b\n%Y'))
ax.tick_params(direction='out', color='#878dac', labelsize=13, rotation=0, pad=15,
               labelcolor='#878dac')

ax.set_ylabel('Number of accounts', labelpad=35, fontsize=18)
ax.set_xlabel('Creation date', labelpad=35, fontsize=18)
plt.box(False);