In [None]:
# Code for extracting the annual SOMO35 Score for each station in QUALITAIR CORSE (Région: Corse)

In [1]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# O3 QUALITAIR CORSE

# File path to the data
O3_QUALITAIR_CORSE_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - QUALITAIR CORSE.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_QUALITAIR_CORSE_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin        Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  QUALITAIR CORSE  FR94ZAR01   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  QUALITAIR CORSE  FR94ZAR02   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  QUALITAIR CORSE  FR94ZAR01   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  QUALITAIR CORSE  FR94ZAR02   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  QUALITAIR CORSE  FR94ZAR01   

           Zas code site             nom site type d'implantation Polluant  \
0  ZAR AJACCIO   FR41001      AJACCIO CANETTO             Urbaine       O3   
1   ZAR BASTIA   FR41002        BASTIA GIRAUD             Urbaine       O3   
2  ZAR AJACCIO   FR41003  AJACCIO PIATANICCIA         Périurbaine       O3   
3   ZAR BASTIA   FR41004     BASTIA LA MARANA         Périurbaine       O3   
4  ZAR AJACCIO   FR41007      AJACCIO SPOSATA         Périurbaine       O3   

  type d'influence  ... valeur valeur brute unité de mesure  taux de saisie  \
0

In [3]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
AJACCIO CANETTO         4971.0
AJACCIO PIATANICCIA     6411.8
AJACCIO SPOSATA         7667.3
BASTIA GIRAUD           7647.0
BASTIA LA MARANA        7900.9
BASTIA MONTESORO       10165.3
VENACO                  7643.3
Name: SOMO35 Subscore, dtype: float64
              nom site        Date de début          Date de fin  \
0      AJACCIO CANETTO  2023/01/01 00:00:00  2023/01/01 23:59:59   
1  AJACCIO PIATANICCIA  2023/01/01 00:00:00  2023/01/01 23:59:59   
2      AJACCIO SPOSATA  2023/01/01 00:00:00  2023/01/01 23:59:59   
3        BASTIA GIRAUD  2023/01/01 00:00:00  2023/01/01 23:59:59   
4     BASTIA LA MARANA  2023/01/01 00:00:00  2023/01/01 23:59:59   
5     BASTIA MONTESORO  2023/01/01 00:00:00  2023/01/01 23:59:59   
6               VENACO  2023/01/01 00:00:00  2023/01/01 23:59:59   

         Organisme   code zas          Zas code site type d'implantation  \
0  QUALITAIR CORSE  FR94ZAR01  ZAR AJACCIO   FR41001             Urbaine   
1  QUALITAIR CORSE  FR94ZAR01  ZAR AJAC

In [4]:
# Downloading the final dataframe of the annual SOMO35 Score in QUALITAIR CORSE to csv
result.to_csv('df_SOMO35_QUALITAIR_CORSE.csv', index=False, encoding = 'latin1')