# Development of a Widget

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact  # widget manipulation
from IPython.display import HTML

## 1)  Data importation:


In [2]:
from download import download

# We choose daily data
url = "https://opendata.arcgis.com/datasets/2ab16a5fb61f42c1a689fd9cc466383f_0.csv"
path_target = "datasets/Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv"
download(url, path_target, replace=True)

Downloading data from https://opendata.arcgis.com/datasets/2ab16a5fb61f42c1a689fd9cc466383f_0.csv (1 byte)



file_sizes: 10.8MB [00:07, 1.38MB/s]                                            


Successfully downloaded file to datasets/Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv


'datasets/Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv'

In [3]:
occ = pd.read_csv(path_target)
# all data of Occitanie

occ.head(n=5) # simpliest way too watch data

Unnamed: 0,X,Y,nom_dept,nom_com,insee_com,nom_station,code_station,typologie,influence,nom_poll,id_poll_ue,valeur,unite,metrique,date_debut,date_fin,statut_valid,x_l93,y_l93,ObjectId
0,0.179722,43.6303,GERS,PEYRUSSE-VIEILLE,32317,Peyrusse Vieille Rural,FR50020,Rurale Nationale,Fond,PM10,5,15.9,ug.m-3,journaliere,2019-12-27T01:00:00.000Z,2019-12-28T00:00:00.000Z,t,472377,6285316,1
1,3.89861,43.61,HERAULT,MONTPELLIER,34172,Montpellier - Pompignane Trafic,FR50203,Urbaine,Trafic,PM10,5,16.0,ug.m-3,journaliere,2019-12-27T01:00:00.000Z,2019-12-28T00:00:00.000Z,t,772566,6279407,2
2,3.50303,44.522,LOZERE,MENDE,48095,Mende - Vernede urbain,FR50801,Urbaine,Fond,PM10,5,15.9,ug.m-3,journaliere,2019-12-27T01:00:00.000Z,2019-12-28T00:00:00.000Z,t,739976,6380453,3
3,3.50303,44.522,LOZERE,MENDE,48095,Mende - Vernede urbain,FR50801,Urbaine,Fond,NOX as NO2,9,27.6,ug.m-3,journaliere,2019-12-27T01:00:00.000Z,2019-12-28T00:00:00.000Z,t,739976,6380453,4
4,3.50303,44.522,LOZERE,MENDE,48095,Mende - Vernede urbain,FR50801,Urbaine,Fond,O3,7,34.2,ug.m-3,journaliere,2019-12-27T01:00:00.000Z,2019-12-28T00:00:00.000Z,t,739976,6380453,5


## 2) Data treatment

In [4]:
occ['date'] = pd.to_datetime(occ['date_debut']).dt.to_period('M')


#https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.to_period.html
#https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html



Description des variables: 

   + X et Y : lattitude, longitude
   + nom_dept : nom du département
   + nom_com : nom de la commune
   + insee_com: numéro commune par INSEE
   + nom_station : station qui récolte les données
   + code_station : code de la station
   + typologie : type de zone
   + influence : ce qui peut influencer la pollution 
   + nom_poll : nom du polluant
   + id_poll_ue : identifiant du polluant 
   + valeur : valeur mesurée du polluant
   + unite : unité
   + date_debut: année/mois/jour/heure
   + date_fin: idem
   + status_valid : ? 
   + x_l93 : ?
   + y_l93 : ?
   + objectld : ?

In [5]:
# variables description : do not forget that it's a possibility
occ.describe()

Unnamed: 0,X,Y,insee_com,id_poll_ue,valeur,x_l93,y_l93,ObjectId
count,55325.0,55325.0,55325.0,55325.0,55325.0,55325.0,55325.0,55325.0
mean,2.208815,43.555469,40419.081862,531.236837,25.037945,635758.467275,6274265.0,27663.0
std,1.408065,0.43324,17258.013798,1684.047733,30.433983,113988.56255,47785.05,15971.096158
min,-0.051944,42.684,11069.0,1.0,-25.2,451389.0,6176047.0,1.0
25%,1.376,43.2303,31483.0,7.0,6.3,568918.0,6241231.0,13832.0
50%,1.84573,43.61,32317.0,8.0,13.6,608927.0,6279364.0,27663.0
75%,3.50483,43.6758,48095.0,38.0,32.4,740996.0,6286912.0,41494.0
max,4.69736,44.9254,81065.0,6001.0,815.6,836000.0,6425805.0,55325.0


In [6]:
occ.nom_com.unique() # cities available 

array(['PEYRUSSE-VIEILLE', 'MONTPELLIER', 'MENDE', 'PERPIGNAN', 'ALBI',
       'TOULOUSE', 'AGDE', 'SAZE', 'SAINT-GELY-DU-FESC', 'LOURDES',
       'CASTRES', 'TARBES', 'NIMES', 'LA CALMETTE', 'LUNEL-VIEL',
       'MIRAMONT-DE-COMMINGES', 'SAINT-GAUDENS', 'MONTGISCARD',
       'BELESTA-EN-LAURAGAIS', 'LATTES', 'SAINT-ESTEVE', 'BLAGNAC',
       'CORNEILHAN', 'BIARS-SUR-CERE', 'SAINT-LAURENT-DES-ARBRES',
       'RODEZ', 'CARCASSONNE', 'CAUNES-MINERVOIS', 'BESSIERES',
       'ROQUEREDONDE', 'VALLABREGUES'], dtype=object)

We only care about ozone: 

In [7]:
occ = occ[occ['nom_poll'] == 'O3'] # only ozone

### Which cities are available?

In [8]:
occ.nom_com.unique() # cities with ozone data available 

array(['MENDE', 'SAZE', 'TOULOUSE', 'SAINT-GELY-DU-FESC', 'ALBI',
       'MONTPELLIER', 'LOURDES', 'CASTRES', 'TARBES', 'NIMES',
       'LA CALMETTE', 'PEYRUSSE-VIEILLE', 'MONTGISCARD',
       'BELESTA-EN-LAURAGAIS', 'LATTES', 'SAINT-ESTEVE', 'AGDE',
       'CORNEILHAN', 'MIRAMONT-DE-COMMINGES', 'RODEZ', 'CAUNES-MINERVOIS',
       'CARCASSONNE', 'PERPIGNAN', 'ROQUEREDONDE', 'BIARS-SUR-CERE',
       'VALLABREGUES'], dtype=object)

We have 27 different cities available with ozone data

Warning, many stations available by city!

## 3) Widget Developement

The following widget compares ozone pollution in Montpellier, Toulouse and Lourdes for the choosen month:

In [9]:
def poluted_cities0(month):
    
    station = 'FR50030', 'FR50200', 'FR50042'
    df_villes = occ[occ['code_station'].isin(station)]
    
    df_villes = df_villes[df_villes.date == month]
    
    plt.style.use('dark_background')
    sns.catplot(x = 'nom_com', y = 'valeur', 
            data = df_villes,
            height = 3, aspect = 2,
            kind = 'boxen')
    plt.tight_layout()
    plt.xlabel('Cities')
    plt.ylabel('O3')
    plt.title("Comparisons of ozone measurements over a month from 3 cities")
    plt.show()

In [10]:
interact(poluted_cities0, month=occ.date.unique())

interactive(children=(Dropdown(description='month', options=(Period('2019-12', 'M'), Period('2020-02', 'M'), P…

<function __main__.poluted_cities0(month)>

## 4) Second Widget development

The following widget compares again pollution in three cities, but we can both choose the cities and the date:

In [11]:
def poluted_cities(month, station_1='Montpellier Nord - Périurbain', 
                    station_2='Lourdes-Lapaca Urbain', 
                    station_3='Toulouse-Berthelot Urbain'):
    
    stations = station_1, station_2, station_3
    
    df_station = occ[occ['nom_station'].isin(stations)] # only stations we ask
    df_station = df_station[df_station['nom_poll'] == 'O3'] # only ozone  
    df_station = df_station[df_station.date == month]
    df_station = df_station[['nom_com', 'nom_station', 'valeur', 'date']]
    
    plt.style.use('dark_background')
    sns.catplot(x = 'nom_com', y = 'valeur', 
            data = df_station,
            height = 3, aspect = 2,
            kind = 'boxen')
    plt.tight_layout()
    plt.xlabel('Cities')
    plt.ylabel('O3')
    plt.title("Comparison of ozone measurements over a month from 3 cities")
    plt.show()    

In [12]:
interact(poluted_cities, station_1=occ.nom_station.unique(), 
         station_2=occ.nom_station.unique(), 
         station_3=occ.nom_station.unique(), 
         month=occ.date.unique())

interactive(children=(Dropdown(description='month', options=(Period('2019-12', 'M'), Period('2020-02', 'M'), P…

<function __main__.poluted_cities2(month, station_1='Montpellier Nord - Périurbain', station_2='Lourdes-Lapaca Urbain', station_3='Toulouse-Berthelot Urbain')>