In [162]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [163]:
import models
import activitiesdf
from sklearn.metrics.pairwise import haversine_distances
import numpy as np
import plotly
import plotly.express as px
import pandas as pd
import geopandas as gpd

In [2]:
activities = activitiesdf.read_csv('sweden').set_index(['userid', 'region', 'tweetid'])

In [190]:
tweets = pd.read_csv('./../../dbs/sweden/tweets_during_home_interval.csv', parse_dates=True).set_index(['userid', 'region'])
tweets['createdat'] = pd.to_datetime(tweets['createdat'], infer_datetime_format=True)

In [191]:
tweets.groupby('userid').size()

userid
5047                     7
5616                  2162
8981                  1012
32213                  495
392123                 686
                      ... 
789737143312740352       3
790301765744951296      54
794973407964229636     856
797211605494824960      19
806822309478727680      43
Length: 4733, dtype: int64

In [192]:
usertweets = tweets.loc[794973407964229636]
home_location = usertweets[usertweets['label'] == 'home']

In [193]:
usertweets.groupby('region').head(1)

Unnamed: 0_level_0,tweetid,createdat,latitude,longitude,month,weekday,hourofday,timezone,label
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,797842393877745664,2016-11-13 16:43:44+00:00,59.314261,18.076535,11,0,17,Europe/Stockholm,home
1,798607229481984000,2016-11-15 19:22:55+00:00,59.368500,17.967020,11,2,20,Europe/Stockholm,other
2,799325499235237888,2016-11-17 18:57:04+00:00,59.333283,18.067816,11,4,19,Europe/Stockholm,other
3,799653623592402945,2016-11-18 16:40:55+00:00,59.364570,17.974930,11,5,17,Europe/Stockholm,other
4,799919577647878145,2016-11-19 10:17:43+00:00,59.311860,18.057450,11,6,11,Europe/Stockholm,other
...,...,...,...,...,...,...,...,...,...
115,1099390625227493376,2019-02-23 19:28:45+00:00,59.300000,18.166700,2,6,20,Europe/Stockholm,other
116,1100746552581201921,2019-02-27 13:16:43+00:00,59.309151,18.051889,2,3,14,Europe/Stockholm,other
117,1104761332384055296,2019-03-10 15:10:01+00:00,59.292857,18.079165,3,0,16,Europe/Stockholm,other
118,1105497771421941760,2019-03-12 15:56:22+00:00,59.303260,18.098290,3,2,16,Europe/Stockholm,other


In [194]:
m = models.SongModel(p=0.66, gamma=0.6, zipf=-1.7)
m.fit(usertweets)
m.exploration_prob

0.03732804009298748

In [195]:
def save_sample(day, timeslot, d, sample):
    s = [day, timeslot]
    if sample[0] == 'region':
        s = s + ['region', sample[1], sample[2], sample[3]]
    else:
        s = s + ['point', -1, sample[1], sample[2]]
    d.append(s)

d = []
for day in range(7*20):
    prev = ("region", home_location.index[0], home_location.iloc[0]['latitude'], home_location.iloc[0]['longitude'])
    save_sample(day, 0, d, prev)
    for timeslot in range(3):
        prev = m.sample(prev)
        save_sample(day, timeslot, d, prev)

sampled = pd.DataFrame(
    d, 
    columns=['day', 'timeslot', 'kind', 'region', 'latitude', 'longitude'],
)
sampled = gpd.GeoDataFrame(
    sampled,
    crs="EPSG:4326",
    geometry=gpd.points_from_xy(sampled.longitude, sampled.latitude),
)


In [196]:
fig = px.line_mapbox(
    sampled, 
    lat='latitude', lon='longitude', 
    hover_data=['day', 'timeslot', 'region', 'kind'],
    animation_frame='day'
)
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [189]:
fig = px.scatter_mapbox(
    usertweets.reset_index(), 
    lat='latitude', lon='longitude', 
    hover_data=['region', 'hourofday', 'weekday'],
    color='region'
)
fig.update_layout(mapbox_style="open-street-map")
fig.show()