In [None]:
# Code for extracting the annual SOMO35 score for each station in AIR PAYS DE LA LOIRE (Région: Pays de la Loire)

In [2]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# O3 AIR PAYS DE LA LOIRE

# File path to the data
O3_AIR_PAYS_DE_LA_LOIRE_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - AIR PAYS DE LA LOIRE.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_AIR_PAYS_DE_LA_LOIRE_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin             Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR PAYS DE LA LOIRE  FR52ZRE01   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR PAYS DE LA LOIRE  FR52ZAG01   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR PAYS DE LA LOIRE  FR52ZAR03   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR PAYS DE LA LOIRE  FR52ZRE01   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR PAYS DE LA LOIRE  FR52ZAR01   

                        Zas code site       nom site type d'implantation  \
0       ZR PAYS-DE-LA-LOIRE   FR23078  SAINT EXUPERY             Urbaine   
1  ZAG NANTES-SAINT-NAZAIRE   FR23110      LEON BLUM             Urbaine   
2                 ZAR LAVAL   FR23123       MAZAGRAN             Urbaine   
3       ZR PAYS-DE-LA-LOIRE   FR23124    LA TARDIERE    Rurale nationale   
4          ZAR ANGERS-LOIRE   FR23150       APPENTIS             Urbaine   

  Polluant type d'influence  ... valeur valeur brute unité de 

In [5]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
APPENTIS               4948.4
CIM BOUTEILLERIE       4771.4
CLOS DES BEAUVAIS      5758.6
DELACROIX              5081.6
EPINETTES              5578.6
Fillé                  4984.7
GASPARD                5632.5
LA TARDIERE            3810.2
LEON BLUM              5432.3
MAZAGRAN               4620.3
SAINT EXUPERY          5583.0
SAINT-DENIS D'ANJOU    4676.0
SOURCES                4956.5
Name: SOMO35 Subscore, dtype: float64
               nom site        Date de début          Date de fin  \
0              APPENTIS  2023/01/01 00:00:00  2023/01/01 23:59:59   
1      CIM BOUTEILLERIE  2023/01/01 00:00:00  2023/01/01 23:59:59   
2     CLOS DES BEAUVAIS  2023/01/01 00:00:00  2023/01/01 23:59:59   
3             DELACROIX  2023/01/01 00:00:00  2023/01/01 23:59:59   
4             EPINETTES  2023/01/01 00:00:00  2023/01/01 23:59:59   
5                 Fillé  2023/01/01 00:00:00  2023/01/01 23:59:59   
6               GASPARD  2023/01/01 00:00:00  2023/01/01 23:59:59   
7          

In [6]:
# Downloading the final dataframe of annual SOMO35 Score in AIR PAYS DE LA LOIRE to csv
result.to_csv('df_SOMO35_AIR_PAYS_DE_LA_LOIRE.csv', index=False, encoding = 'latin1')