In [None]:
# Code for extracting the annual SOMO35 Score for each station in ATMO NORMANDIE (Région: NORMANDIE)

In [2]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# O3 ATMO NORMANDIE

# File path to the data
O3_ATMO_NORMANDIE_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - ATMO NORMANDIE.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_ATMO_NORMANDIE_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin       Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO NORMANDIE  FR28ZAR01   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO NORMANDIE  FR28ZRE02   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO NORMANDIE  FR28ZAR01   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO NORMANDIE  FR28ZRE02   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO NORMANDIE  FR28ZRE02   

            Zas code site              nom site type d'implantation Polluant  \
0  ZAR LE-HAVRE   FR05010  St Romain de Colbosc         Périurbaine       O3   
1  ZR NORMANDIE   FR05040    Honfleur Sce Tech.             Urbaine       O3   
2  ZAR LE-HAVRE   FR05074  Le Havre Ec. Herriot             Urbaine       O3   
3  ZR NORMANDIE   FR05082               Touques         Périurbaine       O3   
4  ZR NORMANDIE   FR05088                Fécamp         Périurbaine       O3   

  type d'influence  ... valeur valeur brute unité de mesure  taux de saisi

In [4]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
Alençon Météo-France        4800.4
Base loisirs Poses          5222.8
Bures-en-bray               4383.5
Caen Chemin-Vert            3869.6
Centre Hospitalier          4573.0
Cherbourg Hotel de Ville    4297.2
Dieppe Golf                 6232.9
Espace du Palais            4240.9
Evreux Centre               4599.6
Fécamp                      4887.1
Honfleur Sce Tech.          3853.3
IFS Caen sud                4764.3
La Coulonche MERA           5182.8
Le Havre Ec. Herriot        5368.4
Le Havre ville-haute        5745.7
Lisieux                     4355.6
Maison Parc Brotonne        6383.3
Mesnil Esnard               5478.3
Ouistreham                  5038.6
Saint-Lô Eglise             4650.1
St Romain de Colbosc        3909.0
Touques                     4277.3
Name: SOMO35 Subscore, dtype: float64
                    nom site        Date de début          Date de fin  \
0       Alençon Météo-France  2023/01/01 00:00:00  2023/01/01 23:59:59   
1         Base loisirs Poses  2023/

In [5]:
# Downloading the final dataframe of the annual SOMO35 Score in ATMO NORMANDIE to csv
result.to_csv('df_SOMO35_ATMO_NORMANDIE.csv', index=False, encoding = 'latin1')