In [8]:
%load_ext watermark
import numpy as np
import pandas as pd
import datetime as dt
from myst_nb import glue

from plastock import add_table_to_page
from plastockconf import name_zones, name_frequentation, name_situation, particle_groups
from plastockconf import name_substrate, name_distance, table_css_styles_top, name_particles

glue('blank_caption', " ", display=False)

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark


In [6]:
# style time table for reprt
d = pd.read_csv('data/inprocess/benevolestemps.csv')
d.set_index('Plage', inplace=True)
d.index.name = None
d = d.style.set_table_styles(table_css_styles_top)

with open('resources/tables/benevolestemps.html', 'w') as file:
    file.write(d.to_html())

In [11]:
# style time table for reprt
d = pd.read_csv('data/inprocess/summary-table-substrat.csv')
d.set_index('Substrat', inplace=True)
d.index.name = None
d = d.style.set_table_styles(table_css_styles_top)

with open('resources/tables/summary-table-substrat', 'w') as file:
    file.write(d.to_html())

# Variables explicatives

```{figure} resources/maps/annex_map_regions.jpeg
---
name: annex_map
---
{glue:text}`blank_caption` 
```
{numref}`Figure {number}: {name} <annex_map>` Les résultats sont détaillés pour chaque attribut et chaque lieu. Les quatre sections de l'annexe sont séparées en fonction des macropolluants et des micropolluants. Un chapitre résume les résultats par plage et l'emplacement de l'échantillon sur chaque plage, et un chapitre est consacré aux attributs de le site d'échantillonage.

In [2]:
xwork_data = pd.read_csv("data/inprocess/geo_long_form.csv")
xwork_data.set_index("Plage", inplace=True, drop=True)
xwork_data.index.name = None
xwork_data.rename(columns={'frequentation':'fréquentation', 'echantillon':'échantillon'}, inplace=True)

caption="Les points GPS sont ajoutés aux données de l'enquête et exportés au format .csv"

t_one = xwork_data.head().style.set_table_styles(table_css_styles_top)

table_no = 1
section = 'A'
page = 0

table_one = add_table_to_page(t_one, table_no, caption, section, page, " ")
glue('a0tablone', table_one, display=True)

Unnamed: 0,Échantillon,Orientation,Position,Substrat,Fréquentation,Situation,Distance,Objet,Compte,Lat,Lon
Amphion,74_Amp_1,NE,1,4,3,1,1,fibres,97,4640,653
Amphion,74_Amp_10,NNE,2,4,3,1,1,fibres,140,4640,653
Amphion,74_Amp_2,NNE,1,4,3,1,1,fibres,121,4640,653
Amphion,74_Amp_3,NE,1,4,3,1,1,fibres,31,4640,653
Amphion,74_Amp_4,NNE,1,4,3,1,1,fibres,179,4640,653


In [3]:
c = pd.read_csv("data/end_pipe/micro_results.csv")
# column groups by material type
fibres = ['fbr', 'fbb', 'fbj', 'fbt', 'fbn', 'fba']
fdure = ['frr', 'frb', 'frj', 'frt', 'frn', 'fra']
souple = ['fsr', 'fsb', 'fsj', 'fst', 'fsn', 'fsa']
fragments = [*fdure, *souple]


# the sum of all particles for one sample
c["added"] = c[[*fragments, *fibres]].sum(axis=1)
# the sum of the different groups
c["fibres"] = c[fibres].sum(axis=1)
c["fdure"] = c[fdure].sum(axis=1)
c["souple"] = c[souple].sum(axis=1)
c["fragments"] = c[fragments].sum(axis=1)
c.rename(columns={"Echantillon":"echantillon"}, inplace=True)

# melt the data to long form
cAdded = c[['Plage', 'echantillon', 'orientation', 'position', 'substrat','frequentation', 'situation', 'distance', "fibres", "fdure", "souple", "fragments"]].copy()
cAdded[["frequentation", "situation", "distance"]] = cAdded[["frequentation", "situation", "distance"]].astype("int")

# work columns
id_vars = ['Plage', 'echantillon', 'orientation', 'position', 'substrat', 'frequentation', 'situation', 'distance']
value_vars = ["fibres", "fdure", "souple"]

# make workdata to long form
work_data = pd.melt(cAdded, id_vars=id_vars, value_vars=value_vars)
work_data.rename(columns={"variable":"objet", "value":"particules"}, inplace=True)
work_data["particules"] = work_data["particules"].astype("int")
work_data.rename(columns={'frequentation':'fréquentation', 'echantillon':'échantillon'}, inplace=True)

# les plages
locations = c.Plage.unique()
nlocations = len(locations)
samples = c.echantillon.unique()
nsamples = len(samples)
tquantity = work_data.particules.sum()

def add_weight_of_samples(data, samps: int=nsamples, tquantity: int=tquantity):
    # calculates the weight of the samples and the counts for a set of data    
    data["% particules"] = data.particules/tquantity
    data["% échantillon"] = data['échantillon']/nsamples
    
    return data

## Données mises en forme 

In [4]:
wdx = work_data.copy()

wdx["position"] = wdx.position.apply(lambda x: name_zones[x])
wdx["fréquentation"] = wdx['fréquentation'].apply(lambda x: name_frequentation[x])
wdx["situation"] = wdx.situation.apply(lambda x: name_situation[x])
wdx["distance"] = wdx.distance.apply(lambda x: name_distance[x])
wdx["substrat"] = wdx.substrat.apply(lambda x: name_substrate[x])
t2 = wdx.head().set_index("Plage", drop=True)
t2.index.name = None
t_two = t2.style.set_table_styles(table_css_styles_top)


table_no += 1
section = 'A'
page = 0

caption = 'Les données avec les étiquettes'

table_two = add_table_to_page(t_two, table_no, caption, section, page, " ")
glue('a0tabletwo', table_two, display=True)

Unnamed: 0,Échantillon,Orientation,Position,Substrat,Fréquentation,Situation,Distance,Objet,Particules
Amphion,74_Amp_1,NE,ligne d'eau,Cailloux,Elevée,Campagne,< 100 m,fibres,97
Amphion,74_Amp_10,NNE,plage seche,Cailloux,Elevée,Campagne,< 100 m,fibres,140
Amphion,74_Amp_2,NNE,ligne d'eau,Cailloux,Elevée,Campagne,< 100 m,fibres,121
Amphion,74_Amp_3,NE,ligne d'eau,Cailloux,Elevée,Campagne,< 100 m,fibres,31
Amphion,74_Amp_4,NNE,ligne d'eau,Cailloux,Elevée,Campagne,< 100 m,fibres,179


In [5]:
agg_ = {"particules":"sum", "échantillon":"nunique"}

position_total = work_data.groupby("position", as_index=False).agg(agg_)
substrat_total = work_data.groupby("substrat", as_index=False).agg(agg_)
frequentation_total = work_data.groupby("fréquentation", as_index=False).agg(agg_)
situation_total = work_data.groupby("situation", as_index=False).agg(agg_)
orient_total = work_data.groupby("orientation", as_index=False).agg(agg_)
particle_type_total = work_data.groupby("objet", as_index=False).agg(agg_)
distance_total =  work_data.groupby("distance", as_index=False).agg(agg_)

## Variables indépendantes

### Lieu d'échantillonage

In [6]:
beaches = pd.read_csv("data/end_pipe/asl_beaches.csv")
beaches.set_index("Plage", inplace=True, drop=True)
beaches.rename(columns={'frequentation':'fréquentation'}, inplace=True)
# work_data["position"] = work_data.position.apply(lambda x: name_zones[x])
beaches["fréquentation"] = beaches['fréquentation'].apply(lambda x: name_frequentation[x])
beaches["situation"] = beaches.situation.apply(lambda x: name_situation[x])
beaches["distance"] = beaches.distance.apply(lambda x: name_distance[x])
beaches.index.name = None
t3 = beaches.style.format(precision=2).set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les plages et la description du lieu d'échantillonnage"

table_three = add_table_to_page(t3, table_no, caption, section, page, " ")
glue('a0tablethree', table_three, display=True)

Unnamed: 0,Fréquentation,Situation,Distance,Orientation,X,Y
Amphion,Elevée,Campagne,< 100 m,NE,4640,653
Anthy,Moyenne,Campagne,< 100 m,NNO,4635,640
Excenevex,Elevée,Campagne,< 100 m,NE,4635,636
Lugrin,Moyenne,Campagne,100 - 500 m,NNE,4640,667
Meillerie,Faible,Urbain,500 - 1000 m,N,4641,672
Saint-disdille,Moyenne,Campagne,< 100 m,N,4640,650
Tougues,Elevée,Campagne,100 - 500 m,NO,4632,626
Baby plage,Elevée,Urbain,100 - 500 m,NNE,4621,616
Hermance,Elevée,Urbain,100 - 500 m,OSO,4630,624
Port choiseul,Elevée,Urbain,100 - 500 m,NE,4629,617


### Position

In [7]:
pos_total = add_weight_of_samples(position_total)
pos_total['position'] = pos_total.position.apply(lambda x: name_zones[x])
pos_total.set_index('position', drop=True, inplace=True)
pos_total.index.name=None
t4 = pos_total.style.set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les résultats regroupés par position"

table_four = add_table_to_page(t4, table_no, caption, section, page, " ")
glue('a0tablefour', table_four, display=True)

Unnamed: 0,Particules,Échantillon,% particules,% échantillon
Ligne d'eau,12'969,111,33,51
Plage seche,26'120,106,67,49


### Substrat

In [8]:
sub_total = add_weight_of_samples(substrat_total)
sub_total['substrat'] = sub_total.substrat.apply(lambda x: name_substrate[x])
sub_total.set_index('substrat', drop=True, inplace=True)
sub_total.index.name=None
t5 = sub_total.style.set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les résultats regroupés par substrat"

table_five = add_table_to_page(t5, table_no, caption, section, page, " ")
glue('a0tablefive', table_five, display=True)

Unnamed: 0,Particules,Échantillon,% particules,% échantillon
Sables fins,19'345,114,49,53
Sables grossiers,4'253,28,11,13
Graviers,9'732,29,25,13
Cailloux,5'759,46,15,21


### Fréquentation

In [9]:
freq_total = add_weight_of_samples(frequentation_total)
freq_total['fréquentation'] = freq_total['fréquentation'].apply(lambda x: name_frequentation[x])
freq_total.set_index('fréquentation', drop=True, inplace=True)
freq_total.index.name=None
t6 = freq_total.style.set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les résultats regroupés par fréquentation"

table_six = add_table_to_page(t6, table_no, caption, section, page, " ")
glue('a0tablesix', table_six, display=True)

Unnamed: 0,Particules,Échantillon,% particules,% échantillon
Faible,587,8,2,4
Moyenne,9'910,37,25,17
Elevée,28'592,172,73,79


### Situation

In [10]:
sit_total = add_weight_of_samples(situation_total)
sit_total['situation'] = sit_total['situation'].apply(lambda x: name_situation[x])
sit_total.set_index('situation', drop=True, inplace=True)
sit_total.index.name=None
t7 = sit_total.style.set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les résultats regroupés par situation"

table_seven = add_table_to_page(t7, table_no, caption, section, page, " ")
glue('a0tableseven', table_seven, display=True)

Unnamed: 0,Particules,Échantillon,% particules,% échantillon
Campagne,26'098,140,67,65
Urbain,12'991,77,33,35


### Distance parking

In [11]:
dis_total = add_weight_of_samples(distance_total)
dis_total['distance'] = dis_total['distance'].apply(lambda x: name_distance[x])
dis_total.set_index('distance', drop=True, inplace=True)
dis_total.index.name=None
t8 = dis_total.style.set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les résultats regroupés par distance du parking"

table_eight = add_table_to_page(t8, table_no, caption, section, page, " ")
glue('a0tableeight', table_eight, display=True)

Unnamed: 0,Particules,Échantillon,% particules,% échantillon
< 100 m,16'443,93,42,43
100 - 500 m,21'430,110,55,51
500 - 1000 m,76,2,0,1
> 1000 m,1'140,12,3,6


### Orientation de la plage

In [12]:
dis_total = add_weight_of_samples(orient_total)
# dis_total['distance'] = dis_total['distance'].apply(lambda x: name_distance[x])
dis_total.set_index('orientation', drop=True, inplace=True)
dis_total.index.name=None
t9 = dis_total.style.set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les résultats sont regroupés par orientation"

table_nine = add_table_to_page(t9, table_no, caption, section, page, " ", format_index='columns')
glue('a0tablenine', table_nine, display=True)

Unnamed: 0,Particules,Échantillon,% particules,% échantillon
E,40,1,0,0
ENE,271,2,1,1
ESE,399,4,1,2
N,2'380,14,6,6
NE,8'158,42,21,19
NNE,1'104,9,3,4
NNO,1'339,10,3,5
NO,5'675,25,15,12
O,1'487,7,4,3
ONO,724,6,2,3


## Variables dépendantes

### Fragments plastiques par forme

In [13]:
par_total = add_weight_of_samples(particle_type_total)
par_total['objet'] = par_total['objet'].apply(lambda x: particle_groups[x])
par_total.set_index('objet', drop=True, inplace=True)
par_total = par_total.reindex(["Fibre", "Particule rigide", "Particule souple"])
par_total.index.name=None
t10 = par_total.style.set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les résultats sont regroupés par forme de particule"

table_ten = add_table_to_page(t10, table_no, caption, section, page, " ")
glue('a0tableten', table_ten, display=True)

Unnamed: 0,Particules,Échantillon,% particules,% échantillon
Fibre,29'909,217,77,100
Particule rigide,5'105,217,13,100
Particule souple,4'075,217,10,100


### Fragments plastiques par forme et couleur

In [14]:
p_t_c = c[name_particles.keys()].sum(axis=0)
p_t_cdf = pd.DataFrame({"couleur et type":p_t_c.index, "compte":p_t_c.values})
p_t_cdf["% particule"] = p_t_cdf.compte/tquantity
p_t_cdf["couleur et type"] = p_t_cdf["couleur et type"].apply(lambda x: name_particles[x])
p_t_cdf.set_index("couleur et type", drop=True, inplace=True)
p_t_cdf.index.name = None
p_t_cdf["compte"] = p_t_cdf.compte.astype("int")
p_t_cdf.rename(columns={"compte":"particules"}, inplace=True)
t11 = p_t_cdf.style.format(precision=2).set_table_styles(table_css_styles_top)

table_no += 1
section = 'A'
page = 0

caption = "Les résultats regroupés par forme de particule et par couleur"

table_eleven = add_table_to_page(t11, table_no, caption, section, page, " ")
glue('a0tableeleven', table_eleven, display=True)

Unnamed: 0,Particules,% particule
Fibre rouge,2'364,6
Fibre bleu,4'060,10
Fibre jaune,400,1
Fibre transparent,16'920,43
Fibre noire,5'892,15
Fibre autre,273,1
Particule rigide rouge,955,2
Particule rigide bleu,1'400,4
Particule rigide jaune,508,1
Particule rigide transparent,1'202,3


In [15]:
%watermark --iversions -b -r

Git repo: https://github.com/hammerdirt-analyst/plastock.git

Git branch: may5

numpy : 1.26.3
pandas: 2.0.3

