In [2]:
# Code for extracting the annual SOMO35 score for each station in AIR BREIZH (Région: Bretagne)

In [1]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [5]:
# O3 AIR BREIZH

# File path to the data
O3_AIR_BREIZH_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - AIR BREIZH.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_AIR_BREIZH_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin   Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH  FR53ZAR01   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH  FR53ZAR01   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH  FR53ZAG01   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH  FR53ZRE01   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH  FR53ZRE01   

           Zas code site              nom site type d'implantation Polluant  \
0    ZAR BREST   FR19012            Brest Mace             Urbaine       O3   
1    ZAR BREST   FR19016        BREST Plouzané         Périurbaine       O3   
2   ZAG RENNES   FR19018     MORDELLES BELLAIS         Périurbaine       O3   
3  ZR BRETAGNE   FR19020               Kergoff    Rurale nationale       O3   
4  ZR BRETAGNE   FR19032  Lorient B. Bissonnet             Urbaine       O3   

  type d'influence  ... valeur valeur brute unité de mesure  taux de saisie  \
0             Fond  ...  

In [6]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
BREST Plouzané          4879.3
Brest Mace              4484.3
Kergoff                 3522.8
Lorient B. Bissonnet    5152.1
MORDELLES BELLAIS       2961.4
Quimper Zola            4951.5
Rennes Thabor           4249.2
Saint Brieuc Balzac     2482.2
StMalo Rocabey          3921.3
Vannes UTA              4854.3
Name: SOMO35 Subscore, dtype: float64
               nom site        Date de début          Date de fin   Organisme  \
0        BREST Plouzané  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH   
1            Brest Mace  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH   
2               Kergoff  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH   
3  Lorient B. Bissonnet  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH   
4     MORDELLES BELLAIS  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH   
5          Quimper Zola  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZH   
6         Rennes Thabor  2023/01/01 00:00:00  2023/01/01 23:59:59  AIR BREIZ

In [9]:
# Downloading the final dataframe of annual SOMO35 Score in AIR BREIZH to csv
result.to_csv('df_SOMO35_AIRBREIZH.csv', index=False, encoding = 'latin1')