In [1]:
# Code for extracting the annual SOMO35 Score for each station in ATMO OCCITANIE (Région: OCCITANIE)

In [2]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# O3 ATMO OCCITANIE

# File path to the data
O3_ATMO_OCCITANIE_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - ATMO OCCITANIE.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_ATMO_OCCITANIE_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin       Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO OCCITANIE  FR76ZAG02   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO OCCITANIE  FR76ZAG02   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO OCCITANIE  FR76ZAG02   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO OCCITANIE  FR76ZRE01   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO OCCITANIE  FR76ZRE01   

               Zas code site                              nom site  \
0  ZAG MONTPELLIER   FR08016             Montpellier Prés d'Arènes   
1  ZAG MONTPELLIER   FR08017  Montpellier Périurbaine Sud (Lattes)   
2  ZAG MONTPELLIER   FR08018                   Montpellier St Gely   
3     ZR OCCITANIE   FR08022                    Agathois-piscénois   
4     ZR OCCITANIE   FR08023                  Biterrois-Narbonnais   

  type d'implantation Polluant type d'influence  ... valeur valeur brute  \
0             Urbaine       O3             Fond  ...   84.

In [4]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
Agathois-piscénois                      7506.3
BELESTA EN LAURAGAIS                    6105.4
Biterrois-Narbonnais                    7453.8
ECOLE M.JACQUIER                        6155.2
GAUDONVILLE                             6105.0
Gard Rhodanien2                         8741.3
LOURDES LAPACCA                         5620.5
Montauban                               5469.6
Montpellier Prés d'Arènes               4370.8
Montpellier Périurbaine Sud (Lattes)    8582.3
Montpellier St Gely                     8040.2
Nimes Gauzy                             7457.9
Nimes La Calmette                       7614.9
PEYRUSSE                                4742.0
Perpignan Les Carmes                    6911.6
Perpignan St Esteve                     7513.2
Rodez                                   6690.4
SICOVAL                                 6383.5
Stade Travet CASTRES                    7214.9
Tarbes lycée Dupuy                      5337.5
Toulouse Berthelot                      6164.4
Name

In [5]:
# Downloading the final dataframe of the annual SOMO35 Score in ATMO OCCITANIE to csv
result.to_csv('df_SOMO35_ATMO_OCCITANIE.csv', index=False, encoding = 'latin1')