In [24]:
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.offline as py
from plotly.offline import iplot 
import plotly.graph_objects as go

In [25]:
data = pd.read_csv('/kaggle/input/france-air-quality-data/france_air_quality_data_merged.csv')
data.head()


Unnamed: 0,Date,City,Specie,count,min,max,median,variance
0,2019-07-01,Nantes,wind speed,72,0.5,4.3,3.0,13.74
1,2019-07-01,Orléans,dew,72,11.5,15.0,13.0,8.87
2,2019-07-01,Nantes,co,45,0.1,0.1,0.1,0.0
3,2019-07-01,Rennes,temperature,48,13.7,21.0,16.5,58.59
4,2019-07-01,Nantes,dew,66,10.0,14.5,12.0,9.99


In [26]:
print(data["City"].unique())

['Nantes' 'Orléans' 'Rennes' 'Perpignan' 'Nice' 'Toulouse' 'Rouen'
 'Toulon' 'Saint-Étienne' 'Bordeaux' 'Clermont-Ferrand' 'Marseille'
 'Paris' 'Besançon' 'Lyon' 'Tours' 'Metz' 'Nancy' 'Strasbourg' 'Amiens'
 'Montpellier' 'Lille' 'Grenoble' 'Limoges' 'Nîmes' 'Dijon' 'Caen']


In [27]:
print(data["Specie"].unique())

['wind speed' 'dew' 'co' 'temperature' 'o3' 'pm10' 'no2' 'humidity'
 'wind gust' 'pm25' 'pressure' 'so2' 'precipitation']


In [28]:
my_cities = ['Bordeaux', 'Grenoble', 'Toulouse']
df_myCities = data.loc[data["City"].isin(my_cities)] 
df_myCities.head()

Unnamed: 0,Date,City,Specie,count,min,max,median,variance
7,2019-07-01,Toulouse,dew,54,18.0,19.0,19.0,2.14
12,2019-07-01,Bordeaux,wind speed,120,0.2,4.3,1.8,10.92
13,2019-07-01,Toulouse,wind gust,39,1.8,10.0,6.0,94.71
20,2019-07-01,Bordeaux,no2,117,0.5,19.2,3.2,150.32
21,2019-07-01,Bordeaux,pressure,120,1015.4,1021.1,1019.9,12.86


In [29]:
df_myCities = df_myCities.reset_index()[['Date','City','Specie','count']]
df_myCities = df_myCities[df_myCities.Specie.apply(lambda x: x == "pm25" or x == "pm10" or x == "no2" or x == "o3" or x == "so2" or x == "co")] 
df_myCities

Unnamed: 0,Date,City,Specie,count
3,2019-07-01,Bordeaux,no2,117
7,2019-07-01,Grenoble,o3,144
8,2019-07-01,Bordeaux,so2,2
9,2019-07-01,Grenoble,pm10,179
15,2019-07-01,Grenoble,so2,9
...,...,...,...,...
23772,2021-07-27,Grenoble,pm10,58
23773,2021-07-27,Bordeaux,no2,44
23775,2021-07-27,Bordeaux,o3,41
23776,2021-07-27,Bordeaux,pm10,42


### **Animated bar plot of pollutants for all cities**

In [30]:
fig = px.bar(df_myCities, x = 'Specie', y = "count", animation_frame = "Date", color = "City")
fig.show()

### **All pollutant counts by time for a city**

In [41]:
df_Bdx = df_myCities[data['City']== 'Bordeaux']
fig_Bdx = px.line(df_Bdx, x="Date", y="count", color="Specie", title="Pollutant counts by time in Bordeaux")
fig_Bdx.update_traces(mode="markers+lines")
fig_Bdx.show()


Boolean Series key will be reindexed to match DataFrame index.



In [42]:
df_Grn = df_myCities[data['City']== 'Grenoble']
fig_Grn = px.line(df_Grn, x="Date", y="count", color="Specie", title="Pollutant counts by time in Grenoble")
fig_Grn.update_traces(mode="markers+lines")
fig_Grn.show()


Boolean Series key will be reindexed to match DataFrame index.



In [43]:
df_Toul = df_myCities[data['City']== 'Toulouse']
fig_Toul = px.line(df_Toul, x="Date", y="count", color="Specie", title="Pollutant counts by time in Toulouse")
fig_Toul.update_traces(mode="markers+lines")
fig_Toul.show()


Boolean Series key will be reindexed to match DataFrame index.



### **Each pollutant counts by time for all three cities**

In [45]:
def generate_scatter_plots(input_df, name_specie):    
    df_specie = input_df[input_df['Specie'] == name_specie]
    fig = px.line(df_specie, x="Date", y="count", color='City', title= str(name_specie)+" counts by time")
    return fig

In [46]:
fig_pm25 = generate_scatter_plots(df_myCities,"pm25")  
fig_pm25.show()

In [47]:
fig_pm10 = generate_scatter_plots(df_myCities,"pm10")  
fig_pm10.show()

In [53]:
fig_so2 = generate_scatter_plots(df_myCities,"so2")  
fig_so2.show()

In [49]:
fig_no2 = generate_scatter_plots(df_myCities,"no2")  
fig_no2.show()

In [52]:
fig_o3 = generate_scatter_plots(df_myCities,"o3")  
fig_o3.show()

In [51]:
fig_co = generate_scatter_plots(df_myCities,"co")  
fig_co.show()

### Apparently there is no data available for "co" specie.

In [57]:
df_co = df_myCities[df_myCities['Specie']== 'co']
df_co

Unnamed: 0,Date,City,Specie,count


### **In progress: Generate scatter plots with monthly averages**

In [58]:
df_pm25 = df_myCities[df_myCities['Specie'] == 'pm25']


In [59]:
def create_df_monthly(input_df):
    by_month = pd.to_datetime(input_df['Date']).dt.to_period('M').value_counts().sort_index()
    by_month.index = pd.PeriodIndex(by_month.index)
    df_month = by_month.rename_axis('month').reset_index(name='counts')
    return df_month

In [60]:
df_monthly = create_df_monthly(df_pm25)
# df_monthly

In [61]:
def scatter_plot(input_df, name_specie):
    fig = go.Figure(data=go.Scatter(x=input_df['month'].astype(dtype=str), 
                        y=input_df['counts'],
                        marker_color='indianred', text="counts"))
    fig.update_layout({"title": str(name_specie) + ' counts by months',
                   "xaxis": {"title":"Months"},
                   "yaxis": {"title":"Total counts"},
                   "showlegend": False})
    fig.update_traces(mode="markers+lines", hovertemplate=None)

    return fig

In [62]:
fig_pm25 = scatter_plot(df_monthly, "pm25")
fig_pm25.show()