In [None]:
# Code for extracting the annual SOMO35 Score for each station in ATMO GRAND EST (Région: GRAND EST)

In [1]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# O3 ATMO GRAND EST

# File path to the data
O3_ATMO_GRAND_EST_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - ATMO GRAND EST.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_ATMO_GRAND_EST_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin       Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO GRAND EST  FR44ZAG02   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO GRAND EST  FR44ZAG02   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO GRAND EST  FR44ZAG02   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO GRAND EST  FR44ZRE01   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO GRAND EST  FR44ZAR01   

            Zas code site           nom site type d'implantation Polluant  \
0      ZAG METZ   FR01011        Metz-Centre             Urbaine       O3   
1      ZAG METZ   FR01018      Scy-Chazelles         Périurbaine       O3   
2      ZAG METZ   FR01020  Thionville-Centre             Urbaine       O3   
3  ZR GRAND-EST   FR14008              REVIN    Rurale nationale       O3   
4     ZAR REIMS   FR14010            BETHENY         Périurbaine       O3   

  type d'influence  ... valeur valeur brute unité de mesure  taux de saisie  \
0            

In [3]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
BETHENY                               5714.9
CHALONS                               5261.7
CHARLEVILLE                           5039.7
Capitainerie                          5548.3
Colmar Sud                            7667.9
Donon Vosges Moyennes 2               5855.0
Epinal                                4326.4
Haguenau-Place marché aux bestiaux    7011.8
Jean d Aulan                          4632.4
Jonville en Woevre                    6778.4
Metz-Centre                           4833.9
Mulhouse Est                          6636.8
Mulhouse Sud 2                        6314.7
Nancy-Brabois 2                       6336.1
Nancy-Charles III                     5135.1
Nord-Est Alsace                       6107.7
REVIN                                 4567.2
ST PARRES AUX TERTRE                  4681.6
STE SAVINE                               5.4
Schlucht                              5907.8
Scy-Chazelles                         6198.9
St DIZIER L. Michel                   5072.7
S

In [4]:
# Downloading the final dataframe of annual SOMO35 Score in ATMO GRAND EST to csv
result.to_csv('df_SOMO35_ATMO_GRAND_EST.csv', index=False, encoding = 'latin1')