In [3]:
!pip install scipy
!pip install numpy
!pip install matplotlib
!pip install pandas
!pip install seaborn
!pip install zstandard



In [4]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import zstandard as zstd

In [5]:
def read_zst_csv(file_path):
    with open(file_path, "rb") as f:
        dctx = zstd.ZstdDecompressor()
        with dctx.stream_reader(f) as reader:
            decompressed_data = pd.read_csv(reader)
    return decompressed_data

# Chargement des données

In [19]:
hydrobio_data = read_zst_csv("data/donnees_hydrobio.csv.zst")

In [18]:
with open("data/donnees_physicochimie.csv.zst", "rb") as f:
    dctx = zstd.ZstdDecompressor()
    with dctx.stream_reader(f) as reader:
        data = pd.read_csv(reader)

    pc_list_cols = data.columns
    pc_list_cat = pc_list_cols[
        pc_list_cols.str.startswith(("Lb", "Nom", "Mnemo", "Cd", "Sym", "Com"))
    ]

    pc_dict_cat = {col: "category" for col in pc_list_cat}

    with dctx.stream_reader(f) as reader:
        physicochimie_data = pd.read_csv(
            "data/donnees_physicochimie.csv.zst",
            sep=",",
            engine="c",
            escapechar="\\",
            dtype=pc_dict_cat,
            parse_dates=[7],
            iterator=False,
        )

  data = pd.read_csv(reader)


In [20]:
with open("data/stations_hb.csv.zst", "rb") as f:
    dctx = zstd.ZstdDecompressor()
    with dctx.stream_reader(f) as reader:
        station_data = pd.read_csv(reader, sep=';',escapechar = '\\')

station_data = station_data[station_data['CdStationMesureEauxSurface'].str.isnumeric()]

# Analyse des données

## données phyco-chimiques

In [25]:
physicochimie_data.info()
physicochimie_data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8917443 entries, 0 to 8917442
Data columns (total 49 columns):
 #   Column                      Dtype         
---  ------                      -----         
 0   CdStationMesureEauxSurface  category      
 1   LbStationMesureEauxSurface  category      
 2   CdSupport                   category      
 3   LbSupport                   category      
 4   CdFractionAnalysee          category      
 5   LbFractionAnalysee          category      
 6   CdPrelevement               category      
 7   DatePrel                    datetime64[ns]
 8   HeurePrel                   object        
 9   CdParametre                 category      
 10  LbLongParamètre             category      
 11  RsAna                       float64       
 12  CdUniteMesure               category      
 13  SymUniteMesure              category      
 14  CdRqAna                     category      
 15  MnemoRqAna                  category      
 16  CdInsituAna       

Unnamed: 0,CdStationMesureEauxSurface,LbStationMesureEauxSurface,CdSupport,LbSupport,CdFractionAnalysee,LbFractionAnalysee,CdPrelevement,DatePrel,HeurePrel,CdParametre,...,CommentairesAna,ComResultatAna,CdRdd,NomRdd,CdProducteur,NomProducteur,CdPreleveur,NomPreleveur,CdLaboratoire,NomLaboratoire
0,5005600,La Béronne en aval de Melle,3,Eau,23,Eau brute,200561,2005-07-06,10:55:00,1305,...,,,400000779.0,Réseau de suivi de la qualité des eaux superfi...,25160178700053,ETABLISSEMENT PUBLIC TERRITORIAL DE BASSIN CHA...,22790001600014,DEPARTEMENT DES DEUX SEVRES,379,Direction Départementale de l'Agriculture et d...
1,5200115,Le Laurhibar à Saint Jean le Vieux,3,Eau,23,Eau brute,203218,2005-09-28,11:15:00,1313,...,,,500000249.0,Réseau de suivi de la qualité des eaux superfi...,18310006400033,AGENCE DE L'EAU ADOUR GARONNE,22640001800066,DEPARTEMENT DES PYRENEES ATLANTIQUES - CENTR R...,22640001800066,DEPARTEMENT DES PYRENEES ATLANTIQUES - CENTR R...
2,5001800,L'Arnoult en amont de Pont l'Abbé d'Arnoult,3,Eau,23,Eau brute,198487,2005-01-19,15:00:00,1301,...,,,,,18310006400033,AGENCE DE L'EAU ADOUR GARONNE,18310006400033,AGENCE DE L'EAU ADOUR GARONNE,18310006400033,AGENCE DE L'EAU ADOUR GARONNE
3,5001800,L'Arnoult en amont de Pont l'Abbé d'Arnoult,3,Eau,23,Eau brute,198487,2005-01-19,15:00:00,1302,...,,,,,18310006400033,AGENCE DE L'EAU ADOUR GARONNE,18310006400033,AGENCE DE L'EAU ADOUR GARONNE,18310006400033,AGENCE DE L'EAU ADOUR GARONNE
4,5001800,L'Arnoult en amont de Pont l'Abbé d'Arnoult,3,Eau,23,Eau brute,198487,2005-01-19,15:00:00,1303,...,,,,,18310006400033,AGENCE DE L'EAU ADOUR GARONNE,18310006400033,AGENCE DE L'EAU ADOUR GARONNE,18310006400033,AGENCE DE L'EAU ADOUR GARONNE


## données stations

In [26]:
station_data.info()
station_data.head()

<class 'pandas.core.frame.DataFrame'>
Index: 6472 entries, 0 to 6471
Data columns (total 39 columns):
 #   Column                                     Non-Null Count  Dtype  
---  ------                                     --------------  -----  
 0   CdStationMesureEauxSurface                 6472 non-null   object 
 1   LbStationMesureEauxSurface                 6472 non-null   object 
 2   DurStationMesureEauxSurface                1296 non-null   float64
 3   CoordXStationMesureEauxSurface             6472 non-null   float64
 4   CoordYStationMesureEauxSurface             6472 non-null   float64
 5   CdProjStationMesureEauxSurface             6472 non-null   int64  
 6   LibelleProjection                          6472 non-null   object 
 7   CodeCommune                                6470 non-null   object 
 8   LbCommune                                  6470 non-null   object 
 9   CodeDepartement                            6470 non-null   object 
 10  LbDepartement                

Unnamed: 0,CdStationMesureEauxSurface,LbStationMesureEauxSurface,DurStationMesureEauxSurface,CoordXStationMesureEauxSurface,CoordYStationMesureEauxSurface,CdProjStationMesureEauxSurface,LibelleProjection,CodeCommune,LbCommune,CodeDepartement,...,DateMAJInfosStationMesureEauxSurface,FinaliteStationMesureEauxSurface,LocPreciseStationMesureEauxSurface,CodeNatureStationMesureEauxSurface,LibelleNatureStationMesureEauxSurface,AltitudePointCaracteritisque,PkPointTronconEntiteHydroPrincipale,PremierMoisAnneeEtiage,SuperficieBassinVersantReel,SuperficieBassinVersantTopo
0,1000477,LA SLACK À RINXENT (62),,610228.78,7078879.9,26,RGF93 / Lambert 93,62711,RINXENT,62,...,2015-12-14 00:00:00,,Lieu-dit Ferme du Château. La Planche du Devin,M,Station de mesure Manuelle,0.0,980.48,6.0,,0.0
1,1000602,COLOGNE à BUIRE COURCELLES (80),,700318.4,6980033.6,26,RGF93 / Lambert 93,80150,BUIRE-COURCELLES,80,...,2015-12-14 00:00:00,,MOULIN DE BINARD PONT D 194 E,M,Station de mesure Manuelle,0.0,992.58,6.0,,0.0
2,1000605,L'OMIGNON À DEVISE (80),,700279.85,6973284.26,26,RGF93 / Lambert 93,80239,DEVISE,80,...,2015-12-14 00:00:00,,ROUTE DE L'EGLISE,M,Station de mesure Manuelle,0.0,993.69,6.0,,0.0
3,1001122,L'HELPE MAJEURE À EPPE SAUVAGE (59),,784224.69,7002720.08,26,RGF93 / Lambert 93,59198,EPPE-SAUVAGE,59,...,2011-06-07 00:00:00,,PONT DE LA D119,M,Station de mesure Manuelle,183.0,947.01,6.0,,
4,1001131,HELPE MINEURE à GRAND FAYT (59),,757716.99,7002035.12,26,RGF93 / Lambert 93,59270,GRAND-FAYT,59,...,2016-01-06 00:00:00,,ANCIEN MOULIN,M,Station de mesure Manuelle,0.0,987.6,6.0,,0.0


## données hydrobiologiques

In [27]:
hydrobio_data.info()
hydrobio_data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43535 entries, 0 to 43534
Data columns (total 21 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   Unnamed: 0                          43535 non-null  int64  
 1   CdStationMesureEauxSurface          43535 non-null  int64  
 2   LbStationMesureEauxSurface          43535 non-null  object 
 3   CdPointEauxSurf                     43115 non-null  float64
 4   DateDebutOperationPrelBio           43535 non-null  object 
 5   CdSupport                           43535 non-null  int64  
 6   LbSupport                           43535 non-null  object 
 7   DtProdResultatBiologique            14829 non-null  object 
 8   CdParametreResultatBiologique       43535 non-null  int64  
 9   LbLongParametre                     43535 non-null  object 
 10  ResIndiceResultatBiologique         43522 non-null  float64
 11  CdUniteMesure                       43535

Unnamed: 0.1,Unnamed: 0,CdStationMesureEauxSurface,LbStationMesureEauxSurface,CdPointEauxSurf,DateDebutOperationPrelBio,CdSupport,LbSupport,DtProdResultatBiologique,CdParametreResultatBiologique,LbLongParametre,...,CdUniteMesure,SymUniteMesure,CdRqIndiceResultatBiologique,MnemoRqAna,CdMethEval,RefOperationPrelBio,CdProducteur,NomProducteur,CdAccredRsIndiceResultatBiologique,MnAccredRsIndiceResultatBiologique
0,0,2000990,LE LERTZBACH À HEGENHEIM,1.0,2010-07-20,13,Macroinvertébrés aquatiques,,7613,Indice Invertébrés Multimétrique (I2M2),...,X,X,1,Résultat > seuil de quantification et < au seu...,,1814647,18570301400018,AGENCE DE L'EAU RHIN MEUSE,0.0,Inconnu
1,1,2001000,L'AUGRABEN À BARTENHEIM,2.0,2010-09-20,13,Macroinvertébrés aquatiques,,7613,Indice Invertébrés Multimétrique (I2M2),...,X,X,1,Résultat > seuil de quantification et < au seu...,,1814648,18570301400018,AGENCE DE L'EAU RHIN MEUSE,0.0,Inconnu
2,2,2001000,L'AUGRABEN À BARTENHEIM,2.0,2011-08-03,13,Macroinvertébrés aquatiques,,7613,Indice Invertébrés Multimétrique (I2M2),...,X,X,1,Résultat > seuil de quantification et < au seu...,,2148304,18570301400018,AGENCE DE L'EAU RHIN MEUSE,0.0,Inconnu
3,3,2001000,L'AUGRABEN À BARTENHEIM,2.0,2012-08-16,13,Macroinvertébrés aquatiques,,7613,Indice Invertébrés Multimétrique (I2M2),...,X,X,1,Résultat > seuil de quantification et < au seu...,,2147990,18570301400018,AGENCE DE L'EAU RHIN MEUSE,0.0,Inconnu
4,4,2001025,LE RIEDGRABEN À LANDSER,5.0,2010-07-20,13,Macroinvertébrés aquatiques,,7613,Indice Invertébrés Multimétrique (I2M2),...,X,X,1,Résultat > seuil de quantification et < au seu...,,1814649,18570301400018,AGENCE DE L'EAU RHIN MEUSE,0.0,Inconnu


### Analyse d'une station

In [51]:
analyse_station = physicochimie_data[physicochimie_data["CdStationMesureEauxSurface"] == "01000477"]                

In [69]:
date_prel = analyse_station[["DatePrel", "HeurePrel", "CdParametre"]]
date_prel = date_prel.groupby(["CdParametre"])

  date_prel = date_prel.groupby(["CdParametre"])


Unnamed: 0_level_0,DatePrel,HeurePrel
CdParametre,Unnamed: 1_level_1,Unnamed: 2_level_1
1177,2016-04-08,11:35:00
1295,2016-04-08,11:35:00
1301,2016-04-08,11:35:00
1302,2016-04-08,11:35:00
1303,2016-04-08,11:35:00
1305,2016-04-08,11:35:00
1311,2016-04-08,11:35:00
1312,2016-04-08,11:35:00
1313,2016-04-08,11:35:00
1319,2016-04-08,11:35:00


In [None]:
prelevement_counts = date_prel.size()

# Plot the counts
prelevement_counts.plot(kind="bar", figsize=(10, 6), color="skyblue")

# Add title and labels
plt.title("Number of Prélèvements per CdParametre")
plt.xlabel("CdParametre")
plt.ylabel("Number of Prélèvements")

# Show the plot
plt.show()