In [None]:
# Code for extracting the annual SOMO35 score for each station in AIRPARIF (Région: Île-de-France)

In [1]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# O3 AIRPARIF

# File path to the data
O3_AIRPARIF_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - AIRPARIF.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_AIRPARIF_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin Organisme   code zas        Zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  AIRPARIF  FR11ZAG01  ZAG PARIS   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  AIRPARIF  FR11ZAG01  ZAG PARIS   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  AIRPARIF  FR11ZAG01  ZAG PARIS   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  AIRPARIF  FR11ZAG01  ZAG PARIS   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  AIRPARIF  FR11ZAG01  ZAG PARIS   

  code site           nom site type d'implantation Polluant type d'influence  \
0   FR04004        PARIS 18eme             Urbaine       O3             Fond   
1   FR04017  NEUILLY-SUR-SEINE             Urbaine       O3             Fond   
2   FR04023     CERGY-PONTOISE             Urbaine       O3             Fond   
3   FR04029         VERSAILLES         Périurbaine       O3             Fond   
4   FR04034    VITRY-SUR-SEINE             Urbaine       O3             Fond   

   ... valeur valeur brute unité de mesure  

In [3]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
CERGY-PONTOISE          5683.6
CHAMPIGNY-SUR-MARNE     5100.9
LES ULIS                5859.2
LOGNES                  5360.6
MANTES-LA-JOLIE         4527.9
MELUN                   3777.3
MONTGERON               4906.3
NEUILLY-SUR-SEINE       4459.2
PARIS 13eme             4813.5
PARIS 18eme             4441.2
PARIS 1er Les Halles    4222.4
RAMBOUILLET             5720.2
TREMBLAY-EN-FRANCE      5071.3
VERSAILLES              3809.6
VILLEMOMBLE             4746.7
VITRY-SUR-SEINE         5617.6
Zone Rurale Est         4656.5
Zone Rurale NO          5360.2
Zone Rurale Nord        5910.4
Zone Rurale Nord-Est    4584.8
Zone Rurale SE          5358.2
Zone Rurale SO          5397.3
Zone rurale Sud         5454.8
Name: SOMO35 Subscore, dtype: float64
                nom site        Date de début          Date de fin Organisme  \
0         CERGY-PONTOISE  2023/01/01 00:00:00  2023/01/01 23:59:59  AIRPARIF   
1    CHAMPIGNY-SUR-MARNE  2023/01/01 00:00:00  2023/01/01 23:59:59  AIRPARIF   


In [4]:
# Downloading the final dataframe of the annual SOMO35 Score in AIRPARIF to csv
result.to_csv('df_SOMO35_AIRPARIF.csv', index=False, encoding = 'latin1')