In [None]:
# Code for extracting the annual SOMO35 Score for each station in ATMO HAUTS DE FRANCE (Région: HAUTS DE FRANCE)

In [1]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# O3 ATMO HAUTS DE FRANCE

# File path to the data
O3_ATMO_HAUTS_DE_FRANCE_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - ATMO HAUTS DE FRANCE.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_ATMO_HAUTS_DE_FRANCE_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin             Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO HAUTS DE FRANCE  FR32ZAG01   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO HAUTS DE FRANCE  FR32ZAG01   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO HAUTS DE FRANCE  FR32ZRE01   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO HAUTS DE FRANCE  FR32ZAG01   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO HAUTS DE FRANCE  FR32ZRE01   

                  Zas code site              nom site     type d'implantation  \
0            ZAG BLDV   FR06001  Valenciennes Acacias                 Urbaine   
1            ZAG BLDV   FR06003        Douai Theuriet                 Urbaine   
2  ZR HAUTS-DE-FRANCE   FR06007      Maubeuge Joyeuse                 Urbaine   
3            ZAG BLDV   FR06011        Denain Villars             Périurbaine   
4  ZR HAUTS-DE-FRANCE   FR06133            Cartignies  Rurale près des villes   

  Polluant type d'influence  ...

In [4]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
Arrest                    4684.4
Bethune Stade             3753.4
Calais Parmentier         3966.3
Campagne les B.           3676.4
Cappelle la Grande        4358.1
Cartignies                4364.3
Denain Villars            4726.3
Douai Theuriet            4300.5
Faiencerie Creil          4665.3
Halluin stade             4009.9
Harnes Serres             4640.0
Marcq CTM                  296.0
Maubeuge Joyeuse          3603.3
Neuilly-Saint-Front       4024.9
Noeux-les-Mines           4634.6
Outreau                   3933.1
P. Roth St Quentin        3440.9
Roye                      3874.6
Saint Amand               3810.7
Salouel                   3850.5
Sangatte                  2125.2
St Omer Ribot             2617.5
St Pierre Amiens          2667.7
St Pol mer - cheminots    3912.6
St-Laurent-Blangy         4693.1
Tourcoing Houpline        4573.5
Valenciennes Acacias      4040.1
Wattignies                3976.0
Name: SOMO35 Subscore, dtype: float64
                  nom site   

In [5]:
# Downloading the final dataframe of the annual SOMO35 Score in ATMO HAUTS DE FRANCE to csv
result.to_csv('df_SOMO35_ATMO_HAUTS_DE_FRANCE.csv', index=False, encoding = 'latin1')