In [2]:
# Code for extracting the daily ATMO score for each station in ATMO AUVERGNE-RHÔNE-ALPES (Région: AUVERGNE-RHÔNE-ALPES)

# Note: will use abbreviation ATMO ARA

In [3]:
# Installing required packages
import pandas as pd

import os

# Base directory path
base_path = ".\data\raw\Données de la qualité de l'air (Indicateurs SOMO35 et ATMO)\"

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [4]:
# PM10 ATMO ARA

# File path to the data
PM10_ATMO_ARA_file_path = os.path.join(base_path, "PM10 Moyenne Journalière\Export Moy. journalière - PM10 - ATMO ARA.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_PM10 = pd.read_csv(PM10_ATMO_ARA_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_PM10.head())

# Sample ATMO subscore dictionary for PM10 (Moyenne journalière)
ATMO_subscore_PM10 = {
    'Subscore': [1, 2, 3, 4, 5, 6],
    'Range': [(0.0, 20.0), (20.0, 40.0), (40.0, 50.0), (50.0, 100.0), (100.0, 150.0), (150.0,10000.0)]
}

# Convert the ATMO_subscore dictionary to a DataFrame
ATMO_subscore_df_PM10 = pd.DataFrame(ATMO_subscore_PM10)

# Convert the range tuples to ranges of numbers for easy comparison
ATMO_subscore_df_PM10['Range'] = ATMO_subscore_df_PM10['Range'].apply(lambda x: [round(i / 10, 1) for i in range(int(x[0] * 10), int(x[1] * 10) + 1)])

# Function to assign subscore based on value of PM10 measurement
def assign_subscore_PM10(valeur):
    for index, row in ATMO_subscore_df_PM10.iterrows():  
        if valeur in row['Range']:
            return row['Subscore']
    return None  # Return None if no matching subscore found

# Apply the function to fill in the Subscore column while ensuring the values in the 'valeur' column are recognised as numeric
df_PM10['ATMO sub-score'] = df_PM10['valeur'].apply(assign_subscore_PM10)  

# Display the updated DataFrame
print(df_PM10.head())


         Date de début          Date de fin                  Organisme  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   

    code zas                      Zas code site         nom site  \
0  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07004      Montferrand   
1  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07009     Jardin Lecoq   
2  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07022  Paray le Fresil   
3  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07028      Les Ancizes   
4  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07031          Rageade   

  type d'implantation Polluant type d'influence  ... valeur valeur brute  \
0             Urbaine     PM10             Fond  ...   16.0    15.7250

In [5]:
# PM2.5 ATMO ARA

# File path to the data
PM25_ATMO_ARA_file_path = os.path.join(base_path, "PM2.5 Moyenne Journalière\Export Moy. journalière - PM2.5 - ATMO ARA.csv")
# Use read_csv function from pandas specifying the delimiter as ';'
df_PM25 = pd.read_csv(PM25_ATMO_ARA_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_PM25.head())

# Sample ATMO subscore dictionary for PM2.5 (Moyenne journalière)
ATMO_subscore_PM25 = {
    'Subscore': [1, 2, 3, 4, 5, 6],
    'Range': [(0.0, 10.0), (10.0, 20.0), (20.0, 25.0), (25.0, 50.0), (50.0, 75.0), (75.0,10000.0)]
}

# Convert the ATMO_subscore dictionary to a DataFrame
ATMO_subscore_df_PM25 = pd.DataFrame(ATMO_subscore_PM25)

# Convert the range tuples to ranges of numbers for easy comparison
ATMO_subscore_df_PM25['Range'] = ATMO_subscore_df_PM25['Range'].apply(lambda x: [round(i / 10, 1) for i in range(int(x[0] * 10), int(x[1] * 10) + 1)])

# Function to assign subscore based on value of PM2.5 measurement
def assign_subscore_PM25(valeur):
    for index, row in ATMO_subscore_df_PM25.iterrows(): 
        if valeur in row['Range']:
            return row['Subscore']
    return None  # Return None if no matching subscore found

# Apply the function to fill in the Subscore column while ensuring the values in the 'valeur' column are recognised as numeric
df_PM25['ATMO sub-score'] = df_PM25['valeur'].apply(assign_subscore_PM25)  

# Display the updated DataFrame
print(df_PM25.head())


         Date de début          Date de fin                  Organisme  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   

    code zas                      Zas code site                 nom site  \
0  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07004              Montferrand   
1  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07022          Paray le Fresil   
2  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07031                  Rageade   
3  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07034  Clermont-Esplanade Gare   
4  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07052         Aurillac-Lagarde   

  type d'implantation Polluant type d'influence  ... valeur valeur brute  \
0             Urbaine 

In [6]:
# NO2 ATMO ARA

# File path to the data
NO2_ATMO_ARA_file_path = os.path.join(base_path, "NO2 Max Horaire Journalier\Export Max. journalier moy. hor. - NO2 - ATMO ARA.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_NO2 = pd.read_csv(NO2_ATMO_ARA_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_NO2.head())

# Sample ATMO subscore dictionary for NO2 (Max horaire journalier)
ATMO_subscore_NO2 = {
    'Subscore': [1, 2, 3, 4, 5, 6],
    'Range': [(0.0, 40.0), (40.0, 90.0), (90.0, 120.0), (120.0, 230.0), (230.0, 340.0), (340.0,10000.0)]
}

# Convert the ATMO_subscore dictionary to a DataFrame
ATMO_subscore_df_NO2 = pd.DataFrame(ATMO_subscore_NO2)

# Convert the range tuples to ranges of numbers for easy comparison
ATMO_subscore_df_NO2['Range'] = ATMO_subscore_df_NO2['Range'].apply(lambda x: [round(i / 10, 1) for i in range(int(x[0] * 10), int(x[1] * 10) + 1)])

# Function to assign subscore based on value of NO2 measurement
def assign_subscore_NO2(valeur):
    for index, row in ATMO_subscore_df_NO2.iterrows():  # Corrected variable name
        if valeur in row['Range']:
            return int(row['Subscore'])
    return None  # Return None if no matching subscore found

# Apply the function to fill in the Subscore column
df_NO2['ATMO sub-score'] = df_NO2['valeur'].apply(assign_subscore_NO2)

# Display the updated DataFrame
print(df_NO2.head())


         Date de début          Date de fin                  Organisme  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   

    code zas                      Zas code site                 nom site  \
0  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07004              Montferrand   
1  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07009             Jardin Lecoq   
2  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07022          Paray le Fresil   
3  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07034  Clermont-Esplanade Gare   
4  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07039       Chamalières Europe   

  type d'implantation Polluant type d'influence  ... valeur valeur brute  \
0             Urbaine 

In [7]:
# O3 ATMO ARA

# File path to the data
O3_ATMO_ARA_file_path = os.path.join(base_path, "O3 Max Horaire Journalier\Export Max. journalier moy. hor. - O3 - ATMO ARA.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_O3 = pd.read_csv(O3_ATMO_ARA_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_O3.head())

# Sample ATMO subscore dictionary for O3 (Max horaire journalier)
ATMO_subscore_O3 = {
    'Subscore': [1, 2, 3, 4, 5, 6],
    'Range': [(0.0, 50.0), (50.0, 100.0), (100.0, 130.0), (130.0, 240.0), (240.0, 380.0), (380.0,10000.0)]
}

# Convert the ATMO_subscore dictionary to a DataFrame
ATMO_subscore_df_O3 = pd.DataFrame(ATMO_subscore_O3)

# Convert the range tuples to ranges of numbers for easy comparison
ATMO_subscore_df_O3['Range'] = ATMO_subscore_df_O3['Range'].apply(lambda x: [round(i / 10, 1) for i in range(int(x[0] * 10), int(x[1] * 10) + 1)])

# Function to assign subscore based on value of O3 measurement
def assign_subscore_O3(valeur):
    for index, row in ATMO_subscore_df_O3.iterrows():  # Corrected variable name
        if valeur in row['Range']:
            return int(row['Subscore'])
    return None  # Return None if no matching subscore found

# Apply the function to fill in the Subscore column
df_O3['ATMO sub-score'] = df_O3['valeur'].apply(assign_subscore_O3)

# Display the updated DataFrame
print(df_O3.head())


         Date de début          Date de fin                  Organisme  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   

    code zas                      Zas code site          nom site  \
0  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07004       Montferrand   
1  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07009      Jardin Lecoq   
2  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07022   Paray le Fresil   
3  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07031           Rageade   
4  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07052  Aurillac-Lagarde   

  type d'implantation Polluant type d'influence  ... valeur valeur brute  \
0             Urbaine       O3             Fond  ...   72.0     

In [8]:
# SO2 ATMO ARA

# File path to the data
SO2_ATMO_ARA_file_path = os.path.join(base_path, "SO2 Max Horaire Journalier\Export Max. journalier moy. hor. - SO2 - ATMO ARA.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_SO2 = pd.read_csv(SO2_ATMO_ARA_file_path, delimiter=';')

# Display the first few rows of the DataFrame
print(df_SO2.head())

# Sample ATMO subscore dictionary for SO2 (Max horaire journalier)
ATMO_subscore_SO2 = {
    'Subscore': [1, 2, 3, 4, 5, 6],
    'Range': [(0.0, 100.0), (100.0, 200.0), (200.0, 350.0), (350.0, 500.0), (500.0, 750.0), (750.0,10000.0)]
}

# Convert the ATMO_subscore dictionary to a DataFrame
ATMO_subscore_df_SO2 = pd.DataFrame(ATMO_subscore_SO2)

# Convert the range tuples to ranges of numbers for easy comparison
ATMO_subscore_df_SO2['Range'] = ATMO_subscore_df_SO2['Range'].apply(lambda x: [round(i / 10, 1) for i in range(int(x[0] * 10), int(x[1] * 10) + 1)])

# Function to assign subscore based on value of SO2 measurement
def assign_subscore_SO2(valeur):
    for index, row in ATMO_subscore_df_SO2.iterrows():  # Corrected variable name
        if valeur in row['Range']:
            return int(row['Subscore'])
    return None  # Return None if no matching subscore found

# Apply the function to fill in the Subscore column
df_SO2['ATMO sub-score'] = df_SO2['valeur'].apply(assign_subscore_O3)

# Display the updated DataFrame
print(df_SO2.head())


         Date de début          Date de fin                  Organisme  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   

    code zas                      Zas code site             nom site  \
0  FR84ZAG02             ZAG GRENOBLE   FR15043  Grenoble Les Frenes   
1  FR84ZAG01                 ZAG LYON   FR20029         FEYZIN STADE   
2  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR27003   ROCHES DE CONDRIEU   
3  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR33302     St Germain/Rhône   
4  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR36007    Saint Bauzile CEC   

      type d'implantation Polluant type d'influence  ... valeur valeur brute  \
0                 Urbaine      SO2        

In [9]:
# Combining 5 (if not missing) dataframes
# Concatenate DataFrames along the rows (stack them vertically)
combined_df_ATMO_ARA = pd.concat([df_PM10, df_PM25, df_NO2, df_O3, df_SO2], axis=0) #

# If you want to reset the index of the combined DataFrame
combined_df_ATMO_ARA.reset_index(drop=True, inplace=True)

# Renaming the ATMO subscore column to 'ATMO Score'
combined_df_ATMO_ARA = combined_df_ATMO_ARA.rename(columns={'ATMO sub-score': 'ATMO Score'})

# Printing the first five rows as a sanity check
print(combined_df_ATMO_ARA.head())

         Date de début          Date de fin                  Organisme  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
1  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
2  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
3  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
4  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   

    code zas                      Zas code site         nom site  \
0  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07004      Montferrand   
1  FR84ZAG04     ZAG CLERMONT-FERRAND   FR07009     Jardin Lecoq   
2  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07022  Paray le Fresil   
3  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07028      Les Ancizes   
4  FR84ZRE02  ZR AUVERGNE-RHONE-ALPES   FR07031          Rageade   

  type d'implantation Polluant type d'influence  ... valeur brute  \
0             Urbaine     PM10             Fond  ...    15.725000   
1       

In [24]:
# Find the index of rows with maximum 'ATMO Score' within each group
idx_max_scores = combined_df_ATMO_ARA.groupby(['nom site', 'Date de fin'])['ATMO Score'].idxmax()

# Select rows with the maximum 'ATMO Score' using the index
final_df_ATMO_ARA = combined_df_ATMO_ARA.loc[idx_max_scores]

# Reset index if needed
final_df_ATMO_ARA.reset_index(drop=True, inplace=True)

In [25]:
# Downloading the final dataframe of Daily ATMO Score in ATMO AUVERGNE-RHÔNE-ALPES to csv
final_df_ATMO_ARA.to_csv('final_df_ATMO_ARA.csv', index=False, encoding = 'latin1')

In [26]:
# Constructing indicator for ATMO AUVERGNE-RHÔNE-ALPES (ATMO ARA)

# File path to the data
ATMO_ATMO_ARA_file_path = os.path.join(base_path, "final_df_ATMO_ARA.csv")

# Use read_csv function from pandas specifying the delimiter as ';'
df_ATMO = pd.read_csv(ATMO_ATMO_ARA_file_path, delimiter=',', encoding = 'latin1')

print(df_ATMO.head())

counts = df_ATMO.groupby('nom site')['ATMO Score'].value_counts().reset_index(name='counts')
#print(counts)

#result = pd.merge(counts, df_ATMO[["Organisme", "type d'implantation", "Latitude", "Longitude"]], on='nom site', how='left')
#print(result)

# Getting a df of the site-specific information e.g., code, latitude, longitude
first_row_per_nom_site = df_ATMO.groupby('nom site').first().reset_index()
#print(first_row_per_nom_site)

# Merge the site-specific information with the counts DataFrame
result = pd.merge(counts, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'ATMO Score_y'], axis = 1)
print(result)

         Date de début          Date de fin                  Organisme  \
0  2023/01/01 00:00:00  2023/01/01 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
1  2023/01/02 00:00:00  2023/01/02 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
2  2023/01/03 00:00:00  2023/01/03 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
3  2023/01/04 00:00:00  2023/01/04 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   
4  2023/01/05 00:00:00  2023/01/05 23:59:59  ATMO AUVERGNE-RHÔNE-ALPES   

    code zas       Zas code site         nom site type d'implantation  \
0  FR84ZAG01  ZAG LYON   FR20013  A7 SUD LYONNAIS         Périurbaine   
1  FR84ZAG01  ZAG LYON   FR20013  A7 SUD LYONNAIS         Périurbaine   
2  FR84ZAG01  ZAG LYON   FR20013  A7 SUD LYONNAIS         Périurbaine   
3  FR84ZAG01  ZAG LYON   FR20013  A7 SUD LYONNAIS         Périurbaine   
4  FR84ZAG01  ZAG LYON   FR20013  A7 SUD LYONNAIS         Périurbaine   

  Polluant type d'influence  ... valeur brute unité de mesure taux de saisie  \
0     PM10           Trafic  ...    

In [27]:
# Group the DataFrame by 'nom' and calculate the total counts for each 'nom'
total_counts_by_nom = result.groupby('nom site')['counts'].sum()

# Filter the DataFrame to include only rows where the score is equal to or greater than 3
filtered_result = result[result['ATMO Score_x'] >= 3]

# Group the filtered DataFrame by 'nom' and calculate the counts equal to or greater than 3 for each 'nom'
counts_greater_than_3_by_nom = filtered_result.groupby('nom site')['counts'].sum()

# Calculate the ratio of counts greater than or equal to 3 over total counts for each 'nom'
ratio_counts_greater_than_3 = (counts_greater_than_3_by_nom / total_counts_by_nom).fillna(0)

# Display the resulting Series
print(ratio_counts_greater_than_3)

# Merge the site-specific information with the counts DataFrame
ratio_counts_greater_than_3_df = pd.merge(ratio_counts_greater_than_3, first_row_per_nom_site, on='nom site', how = 'inner').drop(['Date de fin', 'Date de début', 'ATMO Score'], axis = 1)
ratio_counts_greater_than_3_df = ratio_counts_greater_than_3_df.rename(columns={'counts': 'indicator'})
print(ratio_counts_greater_than_3_df)

nom site
A7 SUD LYONNAIS         0.175342
A7 Salaise Ouest        0.084932
A7 Valence Est          0.075419
ANNECY Rocade           0.065753
ANNEMASSE               0.248619
                          ...   
Valence Périurb. Sud    0.320548
Valence Urb. Centre     0.169863
Vichy                   0.190083
Villefranche Centre     0.234160
Voiron Urbain           0.250000
Name: counts, Length: 74, dtype: float64
                nom site  indicator                  Organisme   code zas  \
0        A7 SUD LYONNAIS   0.175342  ATMO AUVERGNE-RHÔNE-ALPES  FR84ZAG01   
1       A7 Salaise Ouest   0.084932  ATMO AUVERGNE-RHÔNE-ALPES  FR84ZAR02   
2         A7 Valence Est   0.075419  ATMO AUVERGNE-RHÔNE-ALPES  FR84ZAR02   
3          ANNECY Rocade   0.065753  ATMO AUVERGNE-RHÔNE-ALPES  FR84ZAR01   
4              ANNEMASSE   0.248619  ATMO AUVERGNE-RHÔNE-ALPES  FR84ZAR01   
..                   ...        ...                        ...        ...   
69  Valence Périurb. Sud   0.320548  ATMO AUVERG

In [28]:
# Downloading the final indicator of ATMO ARA to csv
ratio_counts_greater_than_3_df.to_csv('indicator_ATMO_ARA.csv', index=False, encoding = 'latin1')

In [14]:
# -----------------------------------------------------------------------------------------------------------------------------------------------------
#                                                               Sanity Checks
# -----------------------------------------------------------------------------------------------------------------------------------------------------

In [15]:
print(df_PM25['valeur'].head())

0    7.8
1    6.2
2    6.6
3    8.3
4    6.7
Name: valeur, dtype: float64


In [16]:
df_PM25['ATMO sub-score'].describe()

count    11549.000000
mean         1.377089
std          0.735840
min          1.000000
25%          1.000000
50%          1.000000
75%          2.000000
max          5.000000
Name: ATMO sub-score, dtype: float64

In [17]:
print(df_PM25['valeur'].head())

0    7.8
1    6.2
2    6.6
3    8.3
4    6.7
Name: valeur, dtype: float64
