##### Importing Project Libraries

In [1]:
import os
import datetime as dt
import pandas as pd
import csv
import numpy as np

In [2]:
# add the 'src' directory as one where we can import modules
import sys
import pathlib # __file__ isn't recognized in Jupyter, so we need this to get the root path
path = pathlib.Path.cwd()
PROJ_ROOT = path.parent 
src_dir = str(PROJ_ROOT / 'src')
PROJ_ROOT = str(PROJ_ROOT)
sys.path.append(src_dir)
from data import make_dataset

### Select Usernames, Confirm that the core data is up to date

In [3]:
manual_usernames = ['emily', '+vinny', 'jake', 'hayyogirl', 'Rabbit', 'username']
manual_usernames = ['inreverie', 'jake', 'username', 'rabbit',
       'hayyogirl', '+vinny', 'joeybars7', 'joeybars', 'rusty', 'upstate518',
       'fenderman123', 'philoiz', 'ceecee', 'anonysauce', 'keirmaley',
       'nepthys', 'plush.mary', 'jmeow', 'shanila', 'google', 'nimaghafari']
manual_usernames = ['doyle coleman', 'pinky618', 'davidt', 'vinnov10']
demo_username = 'vinnov10'
# manual_usernames = ['+vinny']
today = dt.date.today()
usernames = make_dataset.refresh_user_data(manual_usernames, PROJ_ROOT, today)
# Re-pulls raw data and refreshes basic interim data files: users_df.pkl 
# and a bunch for each user: filename + username + '.pkl'
#   contacts_df_, day_comm_log_df_, week_comm_log_df_
#   locations_df_, loc_log_df_, day_loc_log_df_, week_loc_log_df_

Updating raw data for:
['doyle coleman', 'pinky618', 'davidt', 'vinnov10']
Updated raw data for users:
['pinky618', 'vinnov10', 'davidt', 'doyle coleman']
Updated raw data for:
['pinky618', 'vinnov10', 'davidt', 'doyle coleman']
Interim users_df not found, generating new


Defaulting to column, but this will raise an ambiguity error in a future version
  activity = user_loc_activity.sort_values('timestamp', ascending=True)
Defaulting to column, but this will raise an ambiguity error in a future version
  activity = user_loc_activity.sort_values('timestamp', ascending=True)


ValueError: Location based indexing can only have [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types

### Set Interested Date Indices

In [None]:
date_indices = pd.date_range(today - dt.timedelta(50), today + dt.timedelta(0), freq='W-MON')
print(date_indices)

##### Establish interim data path, open some data files

In [None]:
from data import user_df_setup

interim_data_file_path = os.path.join(PROJ_ROOT,
                                 "data",
                                 "interim",
                                 "full_users_df.pkl")
raw_data_file_path = os.path.join(PROJ_ROOT,
                                 "data",
                                 "raw",
                                 "users_df.pkl")
users_df = user_df_setup.user_df_setup(raw_data_file_path, interim_data_file_path)
# users_df = pd.read_pickle(interim_data_file_path)
raw_users_df = pd.read_pickle(raw_data_file_path)

# raw_users_df
# raw_users_df[['username', 'timeCreated']].sort_values('timeCreated', ascending = False).head(20)
# all_users_df.sort_values('date_created', ascending = False).head(20)
# all_users_df.head(20)

##### Pulling communication, location data into user dictionaries

In [None]:
interim_data_path = os.path.join(PROJ_ROOT,
                                 "data",
                                 "interim")
raw_data_file_path = os.path.join(PROJ_ROOT,
                                 "data",
                                 "raw")
daily_comm_dict = {}
weekly_comm_dict = {}
contacts_dict = {}

weekly_loc_log_dict = {}
locations_dict = {}

for username in usernames:
    daily_comm_df, weekly_comm_df, weekly_loc_log_df, locations_df = np.nan, np.nan, np.nan, np.nan
    interim_comm_data_file_path = os.path.join(interim_data_path, 'week_comm_log_df_' + username + '.pkl')
    if os.path.isfile(interim_comm_data_file_path):
        weekly_comm_df = pd.read_pickle(interim_comm_data_file_path)
        weekly_comm_dict[username] = weekly_comm_df
        
    interim_comm_data_file_path = os.path.join(interim_data_path, 'day_comm_log_df_' + username + '.pkl')
    if os.path.isfile(interim_comm_data_file_path):
        daily_comm_df = pd.read_pickle(interim_comm_data_file_path)
        daily_comm_dict[username] = daily_comm_df
    
    interim_contact_data_file_path = os.path.join(interim_data_path, 'contacts_df_' + username + '.pkl')
    contacts_df = pd.read_pickle(interim_contact_data_file_path)
    contacts_dict[username] = contacts_df
    
    interim_loc_data_file_path = os.path.join(interim_data_path, 'week_loc_log_df_' + username + '.pkl')
    if os.path.isfile(interim_loc_data_file_path):
        weekly_loc_log_df = pd.read_pickle(interim_loc_data_file_path)
        weekly_loc_log_dict[username] = weekly_loc_log_df
    
    locations_data_file_path = os.path.join(interim_data_path, 'locations_df_' + username + '.pkl')
    locations_df = pd.read_pickle(locations_data_file_path)
    locations_dict[username] = locations_df

In [None]:
# weekly_loc_log_dict['pinky618']
# weekly_comm_dict[usernames[0]]

##### Pulling notifictaion dict

In [None]:
# notification_dict = make_dataset.make_notification_data(users_df, usernames, PROJ_ROOT)

## Plotting Communication

In [None]:
from visualization import generate_report_charts

chart_path = os.path.join(PROJ_ROOT,
                          "notebooks",
                          "figures")

In [None]:
comm_days_line_chart_cols = ['total_comm_days', 'risky_comm_days', 'supportive_comm_days']

for username in usernames:
    print(username)
    comm_days_line_chart_data = weekly_comm_dict[username][comm_days_line_chart_cols]
    comm_days_line_chart_data = comm_days_line_chart_data[min(date_indices):max(date_indices)]
    comm_days_line_chart_data = comm_days_line_chart_data[(comm_days_line_chart_data.index >= date_indices[0])]
#     comm_days_line_chart_data = comm_days_line_chart_data[(comm_days_line_chart_data.index >= date_indices[0]) & (comm_days_line_chart_data.index <= date_indices[-1])]
#     [comm_days_line_chart_data.index >= date_indices[0]]
    generate_report_charts.comm_days_line_chart([username], date_indices, comm_days_line_chart_data, chart_path, show=True)

In [None]:
# print(comm_days_line_chart_data[(comm_days_line_chart_data.index >= min(date_indices))])

In [None]:
comm_vol_bar_chart_cols = ['total_comm', 'risky_comm', 'neutral_comm', 'supportive_comm', 'unrated_comm']

for username in usernames:
    print(username)
    comm_vol_bar_chart_data = weekly_comm_dict[username][comm_vol_bar_chart_cols]
#     print(comm_vol_bar_chart_data[comm_vol_bar_chart_data.index >= date_indices[0]])
    comm_vol_bar_chart_data = comm_vol_bar_chart_data[(comm_vol_bar_chart_data.index >= date_indices[0]) & (comm_vol_bar_chart_data.index <= date_indices[-1])]
    generate_report_charts.comm_vol_bar_chart([username], date_indices, comm_vol_bar_chart_data, chart_path, show=True)

    
    

In [None]:
comm_vol_bar_chart_cols = ['total_comm', 'risky_comm', 'neutral_comm', 'supportive_comm', 'unrated_comm']
daily_date_indices = pd.date_range(date_indices[0], date_indices[-1], freq='D')
for username in usernames:
    print(username)
    comm_vol_bar_chart_data = daily_comm_dict[username][comm_vol_bar_chart_cols]
#     print(comm_vol_bar_chart_data[comm_vol_bar_chart_data.index >= date_indices[0]])
    comm_vol_bar_chart_data = comm_vol_bar_chart_data[(comm_vol_bar_chart_data.index >= date_indices[0]) & (comm_vol_bar_chart_data.index <= date_indices[-1])]
    generate_report_charts.comm_vol_bar_chart([username], daily_date_indices, comm_vol_bar_chart_data, chart_path, show=True)


In [None]:
comm_pie_chart_cols = ['risky_percent', 'neutral_percent', 'supportive_percent', 'unrated_percent']
for username in usernames:
    print(username)
    comm_pie_chart_data = weekly_comm_dict[username][comm_pie_chart_cols]
#     print(comm_pie_chart_data)
#     print(comm_pie_chart_data.index)
#     print(date_indices)
#     print(date_indices[-1].date() - dt.timedelta(7))
#     print(comm_pie_chart_data.loc[date_indices[-2].date()])
    if date_indices[-2] in comm_pie_chart_data.index:
        generate_report_charts.comm_pie_chart([username], date_indices[-2], comm_pie_chart_data.loc[date_indices[-2]], chart_path, show=True)

### Locations

In [None]:
locations_dict = {}
for username in usernames: 
    raw_locations_file_path = os.path.join(PROJ_ROOT,
                             "data",
                             "raw",
                             'locations_df_' + username + '.pkl')
    if os.path.isfile(raw_locations_file_path):
        locations_df = pd.read_pickle(raw_locations_file_path)
    locations_dict[username] = locations_df
# locations_dict[demo_username].head()

### Plotting Location Visits

In [None]:
loc_days_bar_chart_cols = ['days_w_risky_loc_visits']

for username in usernames:
    print(username)
    loc_days_bar_chart_data = weekly_loc_log_dict[username][loc_days_bar_chart_cols]
#     print(comm_vol_bar_chart_data[comm_vol_bar_chart_data.index >= date_indices[0]])
    loc_days_bar_chart_data = loc_days_bar_chart_data[(loc_days_bar_chart_data.index >= date_indices[0]) & (loc_days_bar_chart_data.index <= date_indices[-1])]
    generate_report_charts.loc_days_bar_chart([username], date_indices, loc_days_bar_chart_data, chart_path, show=True)
# loc_days_bar_chart_cols = ['days_w_risky_loc_visits']

### Contact Rating Distribution

In [None]:
# users_df.head
# contacts_dict['+vinny']

In [None]:
import matplotlib.pyplot as plt

f, axarr = plt.subplots(len(usernames),figsize=(10,4*len(usernames)), sharex = True, sharey = True, squeeze=False)
plt.tight_layout()

for count, e in enumerate(usernames):  
    unrated_threshold = users_df.loc[e, 'unrated_threshold']
    risky_threshold = users_df.loc[e, 'risky_threshold']
    supportive_threshold = users_df.loc[e, 'supportive_threshold']
    
    contacts_df = contacts_dict[e].sort_values('score', ascending = False)
    unrated = contacts_df['score'][(contacts_df['score'] < unrated_threshold)
                                  & (contacts_df['relationship'] != 'risky')]
    risky = contacts_df['score'][(contacts_df['relationship'] == 'risky')
                                | ((contacts_df['score'] >= unrated_threshold)
                                & (contacts_df['score'] <= risky_threshold))]
    neutral = contacts_df['score'][(contacts_df['score'] > risky_threshold)
                                & (contacts_df['score'] < supportive_threshold)]
    supportive = contacts_df['score'][(contacts_df['score'] >= supportive_threshold)]
    
    axarr[count, 0].bar(supportive.index, supportive, 0.7, color = '#00cc00')
    axarr[count, 0].bar(neutral.index, neutral, 0.7, color = 'b')
    axarr[count, 0].bar(risky.index, risky, 0.7, color = '#ff6600')
#     axarr[count].bar(unrated.index, unrated, 0.7, color = '#C0C0C0')
    axarr[count, 0].set_title(e + ' - rated: ' + str(len(contacts_df) - len(unrated)) + ' unrated: ' + str(len(unrated)))

In [None]:
weekly_comm_dict['davidt'][comm_days_line_chart_cols]

In [None]:
import numpy as np
activity = weekly_comm_dict['davidt'][comm_days_line_chart_cols]
# print(activity.index)
for i in activity.index:
    print(activity.loc[i])
    print(activity[activity.index > i])
#     print(activity.loc[(i+1):])

In [None]:
import numpy as np
activity = weekly_comm_dict['davidt'][comm_days_line_chart_cols]
# print(activity.index)
for i in range(len(activity.index)):
    print(activity.iloc[i])
    print(activity.iloc[(i+1):])
#     idx = np.searchsorted(df.index, cur_dt)