# GPT Data Visualization

*Date: 27.02.2022*

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Load data. Please change the path if needed.

In [None]:
# load data
data_all, data_days, data_rt_raw, index_common, info_file, info_venue = pd.read_pickle('results/data_processed.pckl')

Average the historical data to draw the heatmap

In [None]:
n_w = len(data_all)
data = data_all[0].copy()
for i in range(1,n_w):
    data += data_all[i]
data = data/n_w

Draw the heatmap

In [None]:
fig, ax = plt.subplots(1,1,figsize=(8,4))

sns.heatmap(data, cmap='coolwarm', ax=ax)

for i in range(1,7):
    ax.axvline(i*24, color='r', linestyle='--', linewidth=2)

ax.set_xlabel('Time of a week (h)')
ax.set_ylabel('Venues')
ax.set_yticks(range(0,len(data),100))
ax.set_yticklabels(range(0,len(data),100))

tick_pos = []
tick_label = []
high_level = ['\nMon','\nTue','\nWed','\nThu','\nFri','\nSat','\nSun']
for i in range(7*3):
    tick_pos.append(i*8)
    tick_label.append(str(i*8%24))
    if i%3 == 1:
        tick_pos.append(i*8+0.5)
        tick_label.append(high_level[int(i/3)])

ax.set_xticks(tick_pos)
ax.set_xticklabels(tick_label, rotation=0, ha='center')

fig.savefig('figures/heatmap_hour_venue.pdf', bbox_inches='tight')

Separate the data by weekdays, Saturday, Sunday.

In [None]:
day_time = []
data_weekday = []
data_sat = []
data_sun = []

day_time0 = []
data_weekday0 = []
data_sat0 = []
data_sun0 = []
for i in range(n_w):
    for j in range(7): # seven days a week
        df = data_days[j][i]
        df0 = data_days[j][i].loc[~(df==0).all(axis=1)]
        day_time0.append(df0.mean(axis=0))
        day_time.append(df.mean(axis=0))
        if j < 5:
            data_weekday.append(df.mean(axis=0).reset_index(drop=True))
            data_weekday0.append(df0.mean(axis=0).reset_index(drop=True))
        if j == 5:
            data_sat.append(df.mean(axis=0))
            data_sat0.append(df0.mean(axis=0))
        if j == 6:
            data_sun.append(df.mean(axis=0))
            data_sun0.append(df0.mean(axis=0))

Draw the heatmap with x as time of day and y as the days.

In [None]:
fig, ax = plt.subplots(1,1,figsize=(6,4))

sns.heatmap(day_time, cmap='coolwarm', ax=ax)

ax.set_xlabel('Time of day (h)')
ax.set_ylabel('Day')

fig.savefig('figures/heatmap_hour_day.pdf', bbox_inches='tight')

Define a function to aggregate the GPT data.

In [None]:
def average_popularity(data_week_agg, data_sat_agg, data_sun_agg, fig_name):
    # draw the change and range of GPT
    fig, ax = plt.subplots(figsize=(5,4))
    ax.tick_params(direction='in',top=True,right=True,which='both',width=1.5)
    ax.spines['bottom'].set_linewidth(1.5)
    ax.spines['left'].set_linewidth(1.5)
    ax.spines['top'].set_linewidth(1.5)
    ax.spines['right'].set_linewidth(1.5)
    
    ax.plot(data_week_agg['mean'], label='Weekday', color='b')
    ax.plot(data_sat_agg['mean'], label='Saturday', color='r')
    ax.plot(data_sun_agg['mean'], label='Sunday', color='g')
    
    ax.fill_between(range(24), data_week_agg['mean']+data_week_agg['std'], 
                    data_week_agg['mean']-data_week_agg['std'], facecolor='blue', alpha=0.3)
    ax.fill_between(range(24), data_sat_agg['mean']+data_sat_agg['std'], 
                    data_sat_agg['mean']-data_sat_agg['std'], facecolor='r', alpha=0.3)
    ax.fill_between(range(24), data_sun_agg['mean']+data_sun_agg['std'], 
                    data_sun_agg['mean']-data_sun_agg['std'], facecolor='g', alpha=0.3)
    
    ax.legend()
    ax.set_xlabel('Time of day (h)')
    ax.set_ylabel('Popularity')
    fig.savefig('figures/'+fig_name+'.pdf', bbox_inches='tight')

Calculate the mean and std of all POIs.

In [None]:
data_sat_agg = pd.concat(data_sat, axis=1).agg(['mean','std'], axis=1).reset_index(drop=True)
data_sun_agg = pd.concat(data_sun, axis=1).agg(['mean','std'], axis=1).reset_index(drop=True)
data_week_agg = pd.concat(data_weekday, axis=1).agg(['mean','std'], axis=1).reset_index(drop=True)
average_popularity(data_week_agg, data_sat_agg, data_sun_agg, 'average_pattern')

Calculate the mean and std of the POIS with at least one non-zero values.

In [None]:
data_sat_agg0 = pd.concat(data_sat0, axis=1).agg(['mean','std'], axis=1).reset_index(drop=True)
data_sun_agg0 = pd.concat(data_sun0, axis=1).agg(['mean','std'], axis=1).reset_index(drop=True)
data_week_agg0 = pd.concat(data_weekday0, axis=1).agg(['mean','std'], axis=1).reset_index(drop=True)
average_popularity(data_week_agg0, data_sat_agg0, data_sun_agg0, 'average_pattern_nozeros')

Define a function to draw the weekly GPT.

In [None]:
def draw_week_plot(data_c, c):
    fig, ax = plt.subplots(figsize=(12,3))
    ax.tick_params(direction='in',top=True,right=True,which='both',width=1.5)
    ax.spines['bottom'].set_linewidth(1.5)
    ax.spines['left'].set_linewidth(1.5)
    ax.spines['top'].set_linewidth(1.5)
    ax.spines['right'].set_linewidth(1.5)
    
    ax.plot(data_c['mean'], color='b', label=c)
    
    ax.fill_between(range(len(data_c)), data_c['mean']+data_c['std'], 
                    data_c['mean']-data_c['std'], facecolor='blue', alpha=0.3)
    
    ax.legend()
    ax.set_xlabel('Time (h)')
    ax.set_ylabel('Popularity')
    fig.savefig('figures/TS_'+c+'.png')

In [None]:
info_venue.set_index(['lat','lon'], inplace=True)
info_venue = info_venue[info_venue.index.isin(index_common)]
info_venue.reset_index(drop=False, inplace=True)
venue_count = info_venue['node_type'].value_counts()
venue_count = venue_count[:10]
for c in venue_count.index:
    data_c_agg = pd.DataFrame()
    for i in range(n_w):
        venue_c = info_venue.loc[info_venue['node_type']==c, ['lat','lon']].set_index(['lat','lon'])
        data_week = data_all[i]
        data_c = data_week[data_week.index.isin(venue_c.index)]
        data_c_agg =pd.concat([data_c_agg, data_c.agg(['mean', 'std'], axis=0).reset_index(drop=True)], axis=1)
    data_c_agg = data_c_agg.T.reset_index(drop=True)
    data_c_agg.columns = ['mean','std']
    
    draw_week_plot(data_c_agg, c)