# Looking at CPC consumption for top-three tiles

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
from tqdm import tqdm
import datetime
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
map_df = gpd.read_file("../midsave/map_crime.gpkg")

In [None]:
cpc = pd.read_csv("../midsave/cpc_com.csv", dtype={'code_com': str})

Run only for cities with high Tor traffic tiles per 1000

In [None]:
cities = ['Toulouse', 'Dijon', 'Orleans']

apps = ['Tor']

traffic_dir = ['DL']

In [None]:
df = pd.DataFrame()

for city_str in cities:
    
    map_city = map_df[map_df.cities == city_str]
    
    df_0 = pd.DataFrame()
    
    for app_str in apps:
        for rate_str in traffic_dir:
            
            df_1 = pd.DataFrame()
            
            for month in range(3, 6):
              print("Month", month, "in", city_str, "for", app_str, rate_str)
              traffic = dict()
              s = 1
              if month == 3:
                s = 16
              if month == 4:
                n = 31
              else:
                n = 32
              for day in tqdm(range(s, n)):
                day_index = day
                if day < 10:
                  day_str = f'20190{month}0{day}'
                else:
                  day_str = f'20190{month}{day}'

                day_print = datetime.datetime.strptime(day_str, '%Y%m%d')
                times = [day_print + datetime.timedelta(minutes=15*i) for i in range(96)]
                times_str = [t.strftime('%H:%M') for t in times]

                # column names
                columns = ['tile_id'] + times_str
                
                df_2 = pd.read_csv(f'../Data/Netmob/{city_str}/{app_str}/{day_str}/{city_str}_{app_str}_{day_str}_{rate_str}.txt', sep = " ", names = columns)
                
                df_2 = pd.melt(df_2, id_vars=['tile_id'], var_name='time', value_name=app_str).copy()
                
                df_2["date"] = day_str
                
                df_2['hour'] = pd.to_datetime(df_2['time'], format='%H:%M').dt.hour
                
                df_2 = (df_2
                        .groupby(['tile_id', 'date', 'hour'])['Tor'].sum()
                        .reset_index()
                        .copy())
                
                df_1 = pd.concat([df_1,df_2])
                                  
            if df_0.empty:
                df_0 = df_1.copy()
            else:
                df_0 = df_0.merge(df_1, on = ['tile_id', 'date', 'hour'], how = 'left').reset_index().copy()
            
            df_0["traffic_dir"] = rate_str
            df_0["cities"] = city_str
            
            
            
    df = pd.concat([df,df_0])

In [None]:
df.head()

# Plot timelines of communes

Communes of interest (those 3 communes with the highest CPC estimate as derived from 2_compute_cpc.ipynb): 31352, 21192, 45072

In [None]:
top3 = (df
        .merge(map_df[['tile_id', 'code_com', 'name_com', 'cities']], on = ['tile_id', 'cities'], how = 'left')
        .merge(cpc[['code_com', 'c']], on = ['code_com'], how = 'left')
        .query('(code_com == "31352") | (code_com == "21192") | (code_com == "45072") | (code_com == "75116")'))

In [None]:
top3['datehour'] = pd.to_datetime(top3['date'] + top3['hour'].astype(str), format='%Y%m%d%H')

In [None]:
top3['Tor_scaled'] = top3['Tor']*top3['c']

In [None]:
top3.head()

In [None]:
fig, ax = plt.subplots(1, figsize=(12,2))
sns.lineplot(data=top3.loc[top3.code_com == "31352"], x="datehour", y="Tor_scaled", hue="tile_id", lw=0.1)
plt.box(on=None)
plt.legend([],[], frameon=False)
#plt.legend(loc='upper left', title='Tiles', frameon=False)
plt.xlabel("Time stamp")
plt.ylabel("Normalized DL traffic")
fig.savefig('../viz/timeline_1_31352.png', dpi=300, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1, figsize=(12,2))
sns.lineplot(data=top3.loc[top3.code_com == "21192"], x="datehour", y="Tor_scaled", hue="tile_id", lw=0.1)
plt.box(on=None)
plt.legend([],[], frameon=False)
#plt.legend(loc='upper left', title='Tiles', frameon=False)
plt.xlabel("Time stamp")
plt.ylabel("Normalized DL traffic")
fig.savefig('../viz/timeline_2_21192.png', dpi=300, bbox_inches="tight")

In [None]:
fig, ax = plt.subplots(1, figsize=(12,2))
sns.lineplot(data=top3.loc[top3.code_com == "45072"], x="datehour", y="Tor_scaled", hue="tile_id", lw=0.1)
plt.box(on=None)
plt.legend([],[], frameon=False)
#plt.legend(loc='upper left', title='Tiles', frameon=False)
plt.xlabel("Time stamp")
plt.ylabel("Normalized DL traffic")
fig.savefig('../viz/timeline_3_45072.png', dpi=300, bbox_inches="tight")