In [1]:
# Code for extracting the annual SOMO35 score for each station in LIG'AIR (Région: Centre-Val de Loire)

In [2]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# O3 LIG'AIR

# File path to the data
O3_LIGAIR_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - LIG'AIR.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_LIGAIR_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Apply the function to fill in the Subscore column
df_O3['SOMO35 Subscore'] = df_O3['valeur'].apply(lambda x: max(x - 70, 0))

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin Organisme   code zas  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR  FR24ZAG02   
1  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR  FR24ZAG01   
2  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR  FR24ZAG01   
3  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR  FR24ZAG02   
4  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR  FR24ZRE02   

                      Zas code site            nom site type d'implantation  \
0             ZAG ORLEANS   FR34017  Marigny-les-usages         Périurbaine   
1               ZAG TOURS   FR34024      Joué lès Tours             Urbaine   
2               ZAG TOURS   FR34026  Tours péri-urbaine         Périurbaine   
3             ZAG ORLEANS   FR34029      La_Source-CNRS             Urbaine   
4  ZR CENTRE-VAL-DE-LOIRE   FR34032             Leblanc             Urbaine   

  Polluant type d'influence  ... valeur valeur brute unité de mesure  \
0       O3             Fond  ...   64.5     

In [4]:
# Compute sum of SOMO35 Subscore grouped by 'nom site'
sum_by_nom = df_O3.groupby('nom site')['SOMO35 Subscore'].sum()

print(sum_by_nom)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_O3.groupby('nom site').first().reset_index()
print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(sum_by_nom, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'SOMO35 Subscore_y'], axis = 1)
print(result)

nom site
Blois nord            6109.5
Chateauroux Sud       5388.7
Dreux Centre          5570.1
Faverolles            4984.5
Fulbert               5433.3
Joué lès Tours        4648.1
La_Source-CNRS        5757.3
Leblanc               5111.0
Marigny-les-usages    4658.5
Montargis Fond           0.0
Montargis Fond 2      4540.5
Montierchaume         5208.2
Oysonville            5371.8
Tours péri-urbaine    5388.0
Verneuil              5614.4
Name: SOMO35 Subscore, dtype: float64
              nom site        Date de début          Date de fin Organisme  \
0           Blois nord  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR   
1      Chateauroux Sud  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR   
2         Dreux Centre  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR   
3           Faverolles  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR   
4              Fulbert  2023/01/01 00:00:00  2023/01/01 23:59:59   LIG'AIR   
5       Joué lès Tours  2023/01/01 00:00:00  2023/

In [5]:
# Downloading the final dataframe of the annual SOMO35 Score in LIG'AIR to csv
result.to_csv('df_SOMO35_LIGAIR.csv', index=False, encoding = 'latin1')