# Development of a Widget

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from ipywidgets import interact  # widget manipulation
from IPython.display import HTML

## 1)  Data importation:


In [2]:
from download import download

# We choose daily data
url = "https://opendata.arcgis.com/datasets/2ab16a5fb61f42c1a689fd9cc466383f_0.csv"
path_target = "datasets/Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv"
download(url, path_target, replace=True)

Downloading data from https://opendata.arcgis.com/datasets/2ab16a5fb61f42c1a689fd9cc466383f_0.csv (1 byte)



file_sizes: 10.8MB [00:01, 7.71MB/s]                                            


Successfully downloaded file to datasets/Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv


'datasets/Mesure_journaliere_Region_Occitanie_Polluants_Principaux.csv'

In [3]:
occ = pd.read_csv(path_target)
# all data of Occitanie

occ.head(n=5) # simpliest way too watch data

Unnamed: 0,X,Y,nom_dept,nom_com,insee_com,nom_station,code_station,typologie,influence,nom_poll,id_poll_ue,valeur,unite,metrique,date_debut,date_fin,statut_valid,x_l93,y_l93,ObjectId
0,3.89861,43.61,HERAULT,MONTPELLIER,34172,Montpellier - Pompignane Trafic,FR50203,Urbaine,Trafic,NO,38,56.8,ug.m-3,journaliere,2020-02-20T01:00:00.000Z,2020-02-21T00:00:00.000Z,t,772566,6279407,1001
1,2.52484,43.3233,AUDE,CAUNES-MINERVOIS,11081,Caunes-Minervois - rural,FR50806,Rurale Régionale,Fond,O3,7,65.9,ug.m-3,journaliere,2020-02-20T01:00:00.000Z,2020-02-21T00:00:00.000Z,t,661437,6247240,1002
2,3.50483,43.2878,HERAULT,AGDE,34003,Agathois-Piscénois - Périurbain,FR50210,Périurbaine,Fond,NO,38,2.5,ug.m-3,journaliere,2020-02-20T01:00:00.000Z,2020-02-21T00:00:00.000Z,t,740996,6243312,1003
3,0.179722,43.6303,GERS,PEYRUSSE-VIEILLE,32317,Peyrusse Vieille Rural,FR50020,Rurale Nationale,Fond,PM10,5,7.0,ug.m-3,journaliere,2020-02-20T01:00:00.000Z,2020-02-21T00:00:00.000Z,t,472377,6285316,1004
4,4.37422,43.8344,GARD,NIMES,30189,Nîmes Sud - urbain,FR50211,Urbaine,Fond,PM10,5,14.6,ug.m-3,journaliere,2020-02-20T01:00:00.000Z,2020-02-21T00:00:00.000Z,t,810536,6304894,1005


## 2) Data treatment

In [4]:
occ['date'] = pd.to_datetime(occ['date_debut']).dt.to_period('M')


#https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.dt.to_period.html
#https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.to_datetime.html



Description des variables: 

   + X et Y : lattitude, longitude
   + nom_dept : nom du département
   + nom_com : nom de la commune
   + insee_com: numéro commune par INSEE
   + nom_station : station qui récolte les données
   + code_station : code de la station
   + typologie : type de zone
   + influence : ce qui peut influencer la pollution 
   + nom_poll : nom du polluant
   + id_poll_ue : identifiant du polluant 
   + valeur : valeur mesurée du polluant
   + unite : unité
   + date_debut: année/mois/jour/heure
   + date_fin: idem
   + status_valid : ? 
   + x_l93 : ?
   + y_l93 : ?
   + objectld : ?

In [5]:
# variables description : do not forget that it's a possibility
occ.describe()

Unnamed: 0,X,Y,insee_com,id_poll_ue,valeur,x_l93,y_l93,ObjectId
count,54905.0,54905.0,54905.0,54905.0,54905.0,54905.0,54905.0,54905.0
mean,2.205786,43.553733,40434.129934,532.253055,24.947468,635512.908988,6274074.0,27453.0
std,1.407911,0.431496,17265.948034,1685.549645,30.402986,113977.683653,47592.26,15849.852602
min,-0.051944,42.684,11069.0,1.0,-8.5,451389.0,6176047.0,1.0
25%,1.376,43.2303,31483.0,7.0,6.2,568918.0,6241231.0,13727.0
50%,1.84573,43.61,32317.0,8.0,13.6,608927.0,6279364.0,27453.0
75%,3.50483,43.6431,48095.0,38.0,32.3,740996.0,6285316.0,41179.0
max,4.69736,44.9254,81065.0,6001.0,815.6,836000.0,6425805.0,54905.0


In [6]:
occ.nom_com.unique() # cities available 

array(['MONTPELLIER', 'CAUNES-MINERVOIS', 'AGDE', 'PEYRUSSE-VIEILLE',
       'NIMES', 'BESSIERES', 'LUNEL-VIEL', 'MENDE', 'ALBI', 'TOULOUSE',
       'CASTRES', 'LOURDES', 'TARBES', 'SAINT-GAUDENS',
       'SAINT-LAURENT-DES-ARBRES', 'LA CALMETTE', 'LATTES', 'PERPIGNAN',
       'SAINT-GELY-DU-FESC', 'SAZE', 'MIRAMONT-DE-COMMINGES',
       'CARCASSONNE', 'RODEZ', 'BIARS-SUR-CERE', 'BLAGNAC', 'CORNEILHAN',
       'BELESTA-EN-LAURAGAIS', 'MONTGISCARD', 'SAINT-ESTEVE',
       'ROQUEREDONDE', 'VALLABREGUES'], dtype=object)

We only care about ozone: 

In [7]:
occ = occ[occ['nom_poll'] == 'O3'] # only ozone

### Which cities are available?

In [8]:
occ.nom_com.unique() # cities with ozone data available 

array(['CAUNES-MINERVOIS', 'MENDE', 'LATTES', 'TOULOUSE', 'NIMES',
       'LA CALMETTE', 'ALBI', 'SAINT-GELY-DU-FESC', 'LOURDES', 'CASTRES',
       'TARBES', 'SAZE', 'PERPIGNAN', 'MONTPELLIER', 'CARCASSONNE',
       'RODEZ', 'CORNEILHAN', 'BELESTA-EN-LAURAGAIS', 'PEYRUSSE-VIEILLE',
       'MONTGISCARD', 'SAINT-ESTEVE', 'MIRAMONT-DE-COMMINGES', 'AGDE',
       'BIARS-SUR-CERE', 'ROQUEREDONDE', 'VALLABREGUES'], dtype=object)

We have 27 different cities available with ozone data

Warning, many stations available by city!

## 3) Widget Developement

The following widget compares ozone pollution in Montpellier, Toulouse and Lourdes for the choosen month:

In [9]:
def poluted_cities0(month):
    
    station = 'FR50030', 'FR50200', 'FR50042'
    df_villes = occ[occ['code_station'].isin(station)]
    
    df_villes = df_villes[df_villes.date == month]
    
    plt.style.use('dark_background')
    sns.catplot(x = 'nom_com', y = 'valeur', 
            data = df_villes,
            height = 3, aspect = 2,
            kind = 'boxen')
    plt.tight_layout()
    plt.xlabel('Cities')
    plt.ylabel('O3')
    plt.title("Comparisons of ozone measurements over a month from 3 cities")
    plt.show()

In [10]:
interact(poluted_cities0, month=occ.date.unique())

interactive(children=(Dropdown(description='month', options=(Period('2020-02', 'M'), Period('2020-01', 'M'), P…

<function __main__.poluted_cities0(month)>

## 4) Second Widget development

The following widget compares again pollution in three cities, but we can both choose the cities and the date:

In [11]:
def poluted_cities(month, station_1='Montpellier Nord - Périurbain', 
                    station_2='Lourdes-Lapaca Urbain', 
                    station_3='Toulouse-Berthelot Urbain'):
    
    stations = station_1, station_2, station_3
    
    df_station = occ[occ['nom_station'].isin(stations)] # only stations we ask
    df_station = df_station[df_station['nom_poll'] == 'O3'] # only ozone  
    df_station = df_station[df_station.date == month]
    df_station = df_station[['nom_com', 'nom_station', 'valeur', 'date']]
    
    plt.style.use('dark_background')
    sns.catplot(x = 'nom_com', y = 'valeur', 
            data = df_station,
            height = 3, aspect = 2,
            kind = 'boxen')
    plt.tight_layout()
    plt.xlabel('Cities')
    plt.ylabel('O3')
    plt.title("Comparison of ozone measurements over a month from 3 cities")
    plt.show()    

In [12]:
interact(poluted_cities, station_1=occ.nom_station.unique(), 
         station_2=occ.nom_station.unique(), 
         station_3=occ.nom_station.unique(), 
         month=occ.date.unique())

interactive(children=(Dropdown(description='month', options=(Period('2020-02', 'M'), Period('2020-01', 'M'), P…

<function __main__.poluted_cities(month, station_1='Montpellier Nord - Périurbain', station_2='Lourdes-Lapaca Urbain', station_3='Toulouse-Berthelot Urbain')>