In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
from tqdm import tqdm
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable

Consumption data

In [None]:
cpc = pd.read_csv("../midsave/cpc_com.csv", dtype={'code_com': str})

In [None]:
cpc.shape

Commune-level map

In [None]:
map_com = gpd.read_file("../external_data/ADMIN-EXPRESS-COG_2-0__SHP__FRA_2019-09-24/ADMIN-EXPRESS-COG/1_DONNEES_LIVRAISON_2019-09-24/ADE-COG_2-0_SHP_WGS84_FR/COMMUNE.shp")

In [None]:
map_com.head(1)

In [None]:
map_df = (map_com
          .rename(columns={"INSEE_COM": "code_com"})[['code_com', 'geometry']]
          .merge(cpc[['cities', 'code_com', 'cpc_per_1000']], on = 'code_com', how = 'inner'))

In [None]:
map_df.head(1)

In [None]:
map_df.shape

In [None]:
map_df.drop_duplicates(['code_com']).shape

In [None]:
map_df.loc[map_df.cities == "Paris"].sort_values(by='code_com', ascending=False)

In [None]:
cities = ['Bordeaux', 'Clermont-Ferrand', 'Dijon', 'Grenoble', 'Lille',
                 'Lyon', 'Mans', 'Marseille', 'Metz', 'Montpellier',
                 'Nancy', 'Nantes', 'Nice', 'Orleans', 'Paris',
                 'Rennes', 'Saint-Etienne', 'Strasbourg', 'Toulouse', 'Tours']

In [None]:
for city_str in tqdm(cities):

    fig, ax = plt.subplots(1, 1, figsize=(10, 10))

    divider = make_axes_locatable(ax)
    cax = divider.append_axes("bottom", size="5%", pad=0.1)

    map_df.query(f"cities == '{city_str}'").plot('cpc_per_1000', 
                                            cmap=sns.cubehelix_palette(as_cmap=True), 
                                            ax=ax, 
                                            legend=True, 
                                            cax=cax, 
                                            legend_kwds={'orientation': 'horizontal'}, 
                                            vmin=map_df[['cpc_per_1000']].min(), 
                                            vmax=map_df[['cpc_per_1000']].max())

    cax.set_title('Normalized download traffic', loc='center', fontsize=10)

    ax.axis('off')

    plt.savefig(f'../viz/{city_str}.png', dpi=300, bbox_inches="tight")
    
    plt.show()

# Heatmap generation

In [None]:
heatmap = pd.read_csv("../midsave/heatmap.csv")

weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

heatmap['Day'] = pd.Categorical(heatmap['Day'], categories=weekdays, ordered=True)

heatmap = heatmap.sort_values(by='Day')

heatmap = heatmap.reset_index(drop=True)

In [None]:
heatmap.head(1)

#### Tor

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 2.5))

sns.heatmap(heatmap.pivot("Day", "Hour", "Tor_scaled"), cmap=sns.cubehelix_palette(as_cmap=True), 
                             cbar_kws={'label': 'Normalized download traffic'})

fig.savefig('../viz/heatmap_tor.png', dpi=300, bbox_inches="tight")

#### Web Adult

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 2.5))

sns.heatmap(heatmap.pivot("Day", "Hour", "Web_Adult"), cmap=sns.cubehelix_palette(as_cmap=True), 
                             cbar_kws={'label': 'Normalized download traffic'})

fig.savefig('../viz/heatmap_wa.png', dpi=300, bbox_inches="tight")

#### YouTube

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 2.5))

sns.heatmap(heatmap.pivot("Day", "Hour", "YouTube"), cmap=sns.cubehelix_palette(as_cmap=True), 
                             cbar_kws={'label': 'Normalized download traffic'})

fig.savefig('../viz/heatmap_yt.png', dpi=300, bbox_inches="tight")

#### Top 10

In [None]:
heatmap = pd.read_csv("../midsave/heatmap_10.csv")

heatmap['Day'] = pd.Categorical(heatmap['Day'], categories=weekdays, ordered=True)

heatmap = heatmap.sort_values(by='Day')

heatmap = heatmap.reset_index(drop=True)

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 2.5))

sns.heatmap(heatmap.pivot("Day", "Hour", "Tor_scaled"), cmap=sns.cubehelix_palette(as_cmap=True), 
                             cbar_kws={'label': 'Normalized download traffic'})

fig.savefig('../viz/heatmap_tor_10.png', dpi=300, bbox_inches="tight")

#### Number 1

In [None]:
heatmap = pd.read_csv("../midsave/heatmap_1_31352.csv")

heatmap['Day'] = pd.Categorical(heatmap['Day'], categories=weekdays, ordered=True)

heatmap = heatmap.sort_values(by='Day')

heatmap = heatmap.reset_index(drop=True)

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 2.5))

sns.heatmap(heatmap.pivot("Day", "Hour", "Tor_scaled"), cmap=sns.cubehelix_palette(as_cmap=True), 
                             cbar_kws={'label': 'Normalized download traffic'})

fig.savefig('../viz/heatmap_1_31352.png', dpi=300, bbox_inches="tight")

#### Number 2

In [None]:
heatmap = pd.read_csv("../midsave/heatmap_2_21192.csv")

heatmap['Day'] = pd.Categorical(heatmap['Day'], categories=weekdays, ordered=True)

heatmap = heatmap.sort_values(by='Day')

heatmap = heatmap.reset_index(drop=True)

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 2.5))

sns.heatmap(heatmap.pivot("Day", "Hour", "Tor_scaled"), cmap=sns.cubehelix_palette(as_cmap=True), 
                             cbar_kws={'label': 'Normalized download traffic'})

fig.savefig('../viz/heatmap_2_21192.png', dpi=300, bbox_inches="tight")

#### Number 3

In [None]:
heatmap = pd.read_csv("../midsave/heatmap_3_45072.csv")

heatmap['Day'] = pd.Categorical(heatmap['Day'], categories=weekdays, ordered=True)

heatmap = heatmap.sort_values(by='Day')

heatmap = heatmap.reset_index(drop=True)

In [None]:
fig, ax = plt.subplots(1, figsize=(12, 2.5))

sns.heatmap(heatmap.pivot("Day", "Hour", "Tor_scaled"), cmap=sns.cubehelix_palette(as_cmap=True), 
                             cbar_kws={'label': 'Normalized download traffic'})

fig.savefig('../viz/heatmap_3_45072.png', dpi=300, bbox_inches="tight")