In [None]:
# Code for extracting the annual SOMO35 Score for each station in ATMO AUVERGNE-RHÔNE-ALPES (Région: AUVERGNE-RHÔNE-ALPES)

# Note: will use abbreviation ATMO ARA

In [1]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# O3 ATMO ARA

# File path to the data
O3_ATMO_ARA_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - ATMO ARA.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_ATMO_ARA_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin                  Organisme  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   

    code zas                      Zas code site          nom site  \
0  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07004       Montferrand   
1  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07009      Jardin Lecoq   
2  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07022   Paray le Fresil   
3  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07031           Rageade   
4  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07052  Aurillac-Lagarde   

  type d'implantation Polluant type d'influence  ... valeur valeur brute  \
0             Urbaine       O3             Fond  ...   72.0     

In [3]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
ANNEMASSE               6328.1
Albertville             6255.0
Aurillac-Lagarde        6124.0
Bourg-en-Bresse         7543.5
Bourgoin-Jallieu        5540.9
CHAMBERY LE HAUT        6763.7
CHAMONIX                3858.2
COTIERE AIN             6837.3
Champ sur Drac          7187.8
Drôme Rurale Sud-SND    6673.1
GAILLARD                6921.4
GERLAND                 4755.0
Grenoble Les Frenes     4812.4
Grenoble PeriurbSud     6355.2
Gresivaudan Periurb     7399.9
HAUT BEAUJOLAIS         5894.4
Jardin Lecoq            6328.7
LA TALAUDIERE           7448.3
LOVERCHY                4813.9
LYON Centre             4781.6
Le Puy-Causans          7069.2
Les Ménuires            9739.1
Montferrand             6072.5
Montluçon               5745.8
Moulins Centre          6416.2
NOVEL                   5505.0
PASSY                   6315.9
PASTEUR                 4923.2
Paray le Fresil         6129.5
Pays du Mezenc          7497.8
Plateau de Bonnevaux    8811.6
ROANNE                  5312.2

In [4]:
# Downloading the final dataframe of the annual SOMO35 Score in ATMO AUVERGNE-RHÔNE-ALPES to csv
result.to_csv('df_SOMO35_ATMO_ARA.csv', index=False, encoding = 'latin1')