In [None]:
# Code for extracting the annual SOMO35 Score for each station in ATMO SUD (Région: Provence-Alpes-Côte d'Azur)

In [1]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# O3 ATMO SUD

# File path to the data
O3_ATMO_SUD_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - ATMO SUD.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_ATMO_SUD_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO SUD  FR93ZAG01   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO SUD  FR93ZAG01   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO SUD  FR93ZAG01   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO SUD  FR93ZAG01   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO SUD  FR93ZRE02   

                             Zas code site              nom site  \
0              ZAG MARSEILLE-AIX   FR02001         Berre l'Etang   
1              ZAG MARSEILLE-AIX   FR02004  Martigues P. Central   
2              ZAG MARSEILLE-AIX   FR02012                Istres   
3              ZAG MARSEILLE-AIX   FR02021      Sausset les Pins   
4  ZR PROVENCE-ALPES-COTE-D-AZUR   FR02022                 Arles   

  type d'implantation Polluant type d'influence  ... valeur valeur brute  \
0         Périurbaine       O3             Fond  ...   70.7       70.700   
1             Urbaine       O3

In [3]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
AIX PLATANES             9799.5
ANTIBES JEAN MOULIN      9288.1
APT                      7510.2
AUBAGNE LES PASSONS      9659.0
AVIGNON   MAIRIE         8122.5
Arles                    6566.3
BRIGNOLES               10104.7
Berre l'Etang           10091.9
CANNES BROUSSAILLES      9107.9
CARPENTRAS               9316.2
CIANS                    9641.3
Cheiron                 11674.4
ESTEREL                  8262.7
GAP COMMANDERIE          5413.7
Istres                   8428.1
LA VALETTE/LA GARDE      7151.7
Le Casset2              11062.4
MANOSQUE                 8766.3
MARSEILLE 5 AVENUES      7138.3
Martigues P. Central     9719.5
NICE ARSON               9181.9
NICE OUEST BOTANIQUE     9883.1
Obs Haute-Provence       9589.9
PLAN AUPS/STE  BAUME    10591.8
SALON                    8891.3
Sausset les Pins         9285.4
TOULON CLARET            8536.4
VALLEE HUVEAUNE          8915.4
Name: SOMO35 Subscore, dtype: float64
                nom site        Date de début          Da

In [4]:
# Downloading the final dataframe of the annual SOMO35 Score in ATMO SUD to csv
result.to_csv('df_SOMO35_ATMO_SUD.csv', index=False, encoding = 'latin1')