In [6]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly
import plotly.graph_objects as pgo
import geopandas as gpd
import activitiesdf
from datetime import timedelta
from sklearn.feature_extraction.text import CountVectorizer
import importlib
from sklearn.metrics.pairwise import haversine_distances
importlib.reload(activitiesdf)

<module 'activitiesdf' from '/Users/ericwennerberg/source/chalmers/mscthesis/src/py/activitiesdf.py'>

In [7]:
activities = activitiesdf.read_csv('sweden').set_index(['userid', 'region', 'tweetid'])

In [120]:
u = activities.loc[11773412].sort_values('createdat').reset_index()

In [121]:
gaps = activitiesdf.gaps(u)

In [122]:
gaps = gaps[gaps['duration'] < timedelta(hours=24)]
gaps = gaps[gaps['region_origin'] != gaps['region_destination']]

# Sankey diagram


In [123]:
labels = pd.concat([gaps['region_origin'], gaps['region_destination']]).unique()
d = gaps.groupby(['region_origin', 'region_destination']).size()
d = d[d > 1]

idx_o = pd.Series(index=d.index, dtype=int)
idx_d = pd.Series(index=d.index, dtype=int)
for i, l in enumerate(labels):
    if l in idx_o.index.get_level_values(0):
        idx_o.loc[l] = i
    if l in idx_d.index.get_level_values(1):
        idx_d.loc[:,l] = i

fig = pgo.Figure(data=[pgo.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = list(labels),
      color = "blue"
    ),
    link = dict(
      source = list(idx_o.values),
      target = list(idx_d.values),
      value = list(d.values)
  ))])
fig.update_layout(title_text="Gaps between regions", font_size=10)
fig.show()

# ??

In [124]:
us = u.set_index(['region'])
home = us.query('label == "home"').head(1)
regions = pd.DataFrame(index=us.index.unique())
regions = regions.assign(size=us.groupby('region').size())
regions = regions.assign(
    most_popular_time= us.groupby(['region', 'hourofday']).size() \
        .groupby('region').nlargest(1).to_frame() \
        .reset_index(1, drop=True).reset_index('hourofday')['hourofday'],
)
regions = regions.assign(
    distance_home = (haversine_distances(
        X=np.radians(us.groupby('region').head(1)[['latitude', 'longitude']].values),
        Y=np.radians(home[['latitude', 'longitude']].values),
    )*6371.0088)[:,0]
)   
px.scatter_matrix(regions)

# Chord diagram

In [125]:
positions = u.groupby('region').head(1)[['latitude', 'longitude']]
nodes = pgo.Scatter(x=positions['latitude'], y=positions['longitude'], mode='markers')


d = gaps.groupby(['region_origin', 'region_destination']).size()
d = d[d > 1].reset_index()

labels = pd.concat([d['region_origin'], d['region_destination']]).unique()
regions = pd.DataFrame(index=labels)
p = np.linspace(0, 2*np.pi, regions.shape[0]+1)
p = p[:-1]
regions = regions.assign(
    x = np.cos(p),
    y = np.sin(p),
)

d = d.merge(regions, left_on='region_origin', right_index=True)
d = d.merge(regions, left_on='region_destination', right_index=True, suffixes=('_origin', '_destination'))

nodes = pgo.Scatter(
    x=regions['x'], 
    y=regions['y'], 
    mode='markers',
    marker=dict(size=20),
    text=regions.index
)
lines = []
for _, r in d.iterrows():
    lines.append(pgo.Scatter(
        x=[r['x_origin'], r['x_destination']],
        y=[r['y_origin'], r['y_destination']],
        mode='lines',
        line=dict(
            shape='spline',
            width=r[0],
            color='blue',
            smoothing=0,
        )
    ))
                 
pgo.Figure(
    data=lines+[nodes],
)

# Force-directed graph

In [126]:
import igraph
g = igraph.Graph()
labels = pd.concat([gaps['region_origin'], gaps['region_destination']]).unique()
g.add_vertices(labels.astype(int).astype(str))
for _, r in gaps.iterrows():
    g.add_edges([(str(int(r['region_origin'])), str(int(r['region_destination'])))])
    layt = g.layout('kk')

Xn=[layt[k][0] for k in range(len(labels))]
Yn=[layt[k][1] for k in range(len(labels))]
Xe=[]
Ye=[]
for e in g.es:
    Xe+=[layt[e.tuple[0]][0],layt[e.tuple[1]][0], None]
    Ye+=[layt[e.tuple[0]][1],layt[e.tuple[1]][1], None]
    
trace1=pgo.Scatter(x=Xe,
               y=Ye,
               mode='lines',
               line= dict(color='rgb(210,210,210)', width=1),
               hoverinfo='none'
               )
trace2=pgo.Scatter(x=Xn,
               y=Yn,
               mode='markers',
               name='ntw',
               marker=dict(symbol='circle-dot',
                                        size=5,
                                        color='#6959CD',
                                        line=dict(color='rgb(50,50,50)', width=0.5)
                                        ),
               text=labels,
               hoverinfo='text'
                  )

plotly.offline.init_notebook_mode(connected=True)
axis=dict(showline=False, # hide axis line, grid, ticklabels and  title
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title=''
          )

width=800
height=800
layout=pgo.Layout(
    showlegend=False,
    autosize=False,
    width=width,
    height=height,
    xaxis=pgo.layout.XAxis(axis),
    yaxis=pgo.layout.YAxis(axis),
    margin=pgo.layout.Margin(
        l=40,
        r=40,
        b=85,
        t=100,
    ),
    hovermode='closest',
    )

data=[trace1, trace2]
fig=pgo.Figure(data=data, layout=layout)
fig.show()