In [1]:
%load_ext watermark
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
from myst_nb import glue 

from plastockmethods import name_the_zones, name_the_particles, frequentation_name, situation_name
from plastockmethods import particle_groups, name_the_substrate, name_the_distance, table_css_styles

def translate_describe(x, value_column, transpose: bool = False):
    described = x.to_dict()
    described.pop("count")
    described["moyenne"] = described.pop("mean")
    described["écart-type"] = described.pop("std")
    df = pd.DataFrame(described.items())
    df.set_index(0, inplace=True)
    df.rename(columns={1:value_column}, inplace=True)
    df.index.name = None
    
    if transpose:
        df = df.T
        
    return df

glue('blank_caption', " ", display=False)

work_data = pd.read_csv("data/end_pipe/long_form_micro.csv")
# wide_form = pd.read_csv("data/end_pipe/aggregated_micro_results.csv")
beach_data = pd.read_csv("data/end_pipe/asl_beaches.csv")

# Microplastiques plage et position

La position fait référence à l'emplacement de l'échantillon de microplastique sur la plage. La ligne d'eau correspond aux échantillons prélevés très près de l'endroit où l'eau rencontre le rivage. La plage sèche fait référence aux endroits de la plage qui sont plus éloignés de la ligne d'eau et qui sont généralement secs.

__Résultats agrégés par forme, position et échantillon__

In [2]:
format_kwargs = dict(precision=2, thousands="'", decimal=",")
def add_table_to_page(table, table_no, caption, section, page, rule: bool = False):
    
    rules = 'Les éléments dont la moyenne des résultats est supérieure à la moyenne du projet sont en rouge.'
    
    if rule:
        caption = f'Table {section}{page}-{table_no}: {caption} {rules}'
    else:
        caption = f'Table {section}{page}-{table_no}: {caption}'
    
    table = table.format_index(str.title, axis=1).format_index(str.title, axis=0).format(**format_kwargs)
    return table.set_caption(caption)
caption = 'Les données sont regroupées en trois groupes par échantillon: 1. fibres, 2. particules rigides, 3. particules souples.'
section = 'A'
rule = 'Les attributs dont la moyenne des résultats est supérieure à la moyenne du projet sont en rouge.'
page = 1
table_no =1

work_data_sample = work_data[work_data.echantillon == "74_Amp_2"].head()
work_data_sample.set_index('objet', drop=True, inplace=True)
work_data_sample.index.name = None
work_data_sample = work_data_sample.style.set_table_styles(table_css_styles)
table_one = add_table_to_page(work_data_sample, table_no, caption, section, page)
glue('tablea11', table_one, display=True)

Unnamed: 0,Plage,Echantillon,Orientation,Position,Substrat,Frequentation,Situation,Distance,Compte
Fibres,Amphion,74_Amp_2,NNE,1,4,3,1,1,121
Fdure,Amphion,74_Amp_2,NNE,1,4,3,1,1,3
Souple,Amphion,74_Amp_2,NNE,1,4,3,1,1,12


## Nombre cumulé d'échantillons et particules par plage

In [3]:
work_data["particules"] = work_data["compte"]
sample_totals = work_data.groupby(["Plage","echantillon"], as_index=False). particules.sum()
df2 = sample_totals.copy()
value_column = "moyenne"


sample_averages = sample_totals.groupby("Plage", as_index=False).agg({"echantillon": "nunique", "particules":"mean"})
average_of_sample_totals = df2.particules.mean()
sample_averages.rename(columns={"particules":value_column}, inplace=True)
sample_averages.set_index("Plage", inplace=True, drop=True)
sample_averages.index.name = None

In [4]:

location_summary = work_data.groupby("Plage", as_index=False).agg({"echantillon": "nunique", "particules":"sum"})
location_summary["% echantillon"] = location_summary.echantillon/location_summary.echantillon.sum()
location_summary["% particules"] = location_summary.particules/location_summary.particules.sum()

df1 = location_summary.copy()
df1.set_index("Plage", inplace=True, drop=True)
df1.index.name = None

a_property =  {'color' : 'red'}
a_property2 = { 'color': 'dodgerblue'}

select_values = df1["% echantillon"] < df1["% particules"]

test_one = df1.loc[select_values].index
caption1 = "Nombre cumulé d'échantillons et le total par plage. Les plages où le proportion du comptage est supérieur au proportion des échantillons sont en rouge."
caption2 = 'Les plages dont la moyenne des résultats est supérieure à la moyenne du projet sont en BLEU.'

caption = f'{caption1} {caption2}'
df1["moyenne"] = df1.index.map(lambda x: sample_averages['moyenne'].loc[x])
select_values2 = df1["moyenne"] > 180
test_two = df1[select_values2].index

ttwo = df1.style.set_table_styles(table_css_styles).set_properties(subset=pd.IndexSlice[test_two, ["moyenne"]], **a_property2)
ttwo = ttwo.set_properties(subset = pd.IndexSlice[test_one,["% echantillon", "% particules"]], **a_property)

table_no +=1

table_two = add_table_to_page(ttwo, table_no, caption, section, page)
glue('tablea12', table_two, display=True)

Unnamed: 0,Echantillon,Particules,% Echantillon,% Particules,Moyenne
Amphion,10,1'250,5,3,12500
Anthy,6,685,3,2,11417
Aubonne,8,757,4,2,9462
Baby Plage,11,1'288,5,3,11709
Bouveret,12,2'114,6,5,17617
Clarens,6,415,3,1,6917
Crans,1,19,0,0,1900
Cully,2,128,1,0,6400
Excenevex,28,3'775,13,10,13482
Gland,1,40,0,0,4000


In [5]:
bar_plots = pd.melt(location_summary, id_vars="Plage", value_vars=["% echantillon", "% particules"])
fig, ax = plt.subplots()
sns.barplot(data=bar_plots, x="Plage", y="value", hue="variable", ax=ax)
ax.legend(loc="upper left")
ax.tick_params(axis="x", labelrotation=90, labelright=True, labelsize=12 )

glue('fig-A11', fig, display=False)
plt.close()

```{glue:figure} fig-A11
---
name: fig-A11
---
{glue:text}`blank_caption` 
```

In [6]:
attribute_columns = ["Plage", "orientation", "position", "substrat", "frequentation", "situation", "distance"]
percent_of_samples = df1.loc[select_values].echantillon.sum()/location_summary.echantillon.sum()
attributes_of_samples = work_data[work_data.Plage.isin(test_one)].drop_duplicates(["Plage"])


def translate_columns(data, columns, keys):
    
    for i, column in enumerate(columns):
        
        data[column] = data[column].apply(lambda x: keys[i][x])
    
    return data

attribute_keys = [name_the_zones, name_the_substrate, frequentation_name, situation_name, name_the_distance]
caption = "Les plages et leurs attributs où le proportion du comptage est supérieur au proportion des échantillons."    

display_df = translate_columns(attributes_of_samples, attribute_columns[2:], attribute_keys)
display_df.set_index("Plage", inplace=True, drop=True)
display_df.index.name = None
table_three = display_df[attribute_columns[1:]].style.set_table_styles(table_css_styles)


table_no += 1


table_three = add_table_to_page(table_three, table_no, caption, section, page)
glue('tablea13', table_three, display=True)

Unnamed: 0,Orientation,Position,Substrat,Frequentation,Situation,Distance
Lugrin,NNE,ligne-d'eau,galet,moyenne,campagne,100 - 500m
Port Choiseul,NE,ligne-d'eau,gravier,élévée,urbain,100 - 500m
Savonnière,NO,ligne-d'eau,galet,élévée,campagne,< 100m
Grangettes,NO,ligne-d'eau,sable fin,moyenne,campagne,100 - 500m
Pichette,SSE,ligne-d'eau,gravier,moyenne,campagne,< 100m
Préverenges,SO,ligne-d'eau,sable fin,élévée,campagne,100 - 500m


## Compte moyen par échantillon et plage

In [7]:
summary_particuless = sample_totals. particules.describe()

summary_particuless_display =  translate_describe(summary_particuless.T, "particules", transpose=True).style.set_table_styles(table_css_styles)

table_no += 1
caption = "Valeurs récapitulatives des totaux des échantillons."

table_four = add_table_to_page(summary_particuless_display, table_no, caption, section, page)
glue('tablea14', table_four, display=True)

Unnamed: 0,Min,25%,50%,75%,Max,Moyenne,Écart-Type
Particules,0,7000,12100,19800,"2'991,00",18013,26762


In [8]:
fig, ax = plt.subplots(2,2, figsize=(7,7))

sns.scatterplot(data=sample_totals, x="echantillon", y="particules", ax=ax[0,0])
ax[0,0].tick_params(labelbottom=False, bottom=False)
ax[0,0].set_ylim(-1, 1500)
ax[0,0].set_title("Total par échantillon", loc="left")

sns.boxplot(data=sample_totals, y="particules", dodge=False, width=.9, ax=ax[0,1])
ax[0,1].set_ylim(-1, 1500)
ax[0,1].set_title("boîte de Tukey", loc="left")

sns.histplot(data=sample_totals, x="particules", ax=ax[1,0], stat="probability", kde=True)
ax[1,0].set_xlim(-1, 1500)
ax[1,0].yaxis.set_major_formatter('{x:.2f}')
ax[1,0].set_ylabel("probabilité")
ax[1,0].set_title("Histogramme", loc="left")

sns.ecdfplot(data=sample_totals, x="particules", ax=ax[1,1])
ax[1,1].set_xlim(-1, 1500)
ax[1,1].set_ylabel("probabilité")
ax[1,1].set_title("Fonction de répartition", loc="left")
plt.tight_layout()
glue('fig-A12', fig, display=False)
plt.close()

```{glue:figure} fig-A12
---
name: fig-A12
---
{glue:text}`blank_caption` 
```

## Comptes moyenne par position 

In [9]:
position_totals = work_data.groupby(["Plage","echantillon", "position"], as_index=False).particules.sum()
position_totals["position"] = position_totals.position.apply(lambda x: name_the_zones[x])
# position_totals["objet"] = position_totals.objet.apply(lambda x: particle_groups[x])
# df4 = position_totals.groupby(["Plage","position"], as_index=False).particules.mean()

# df4 = df4.pivot(index="Plage", columns=["position"])
# df4.index.name = None
# df4.columns
# df4.fillna(0, inplace=True)

caption_one = "Le résultat moyen de d’échantillon par position."
caption_two = "Les plages dont la moyenne des résultats est supérieure à la moyenne du projet sont en rouge."
caption_three = f"Moyenne du projet: {round(average_of_sample_totals, 2)}"
caption = f"{caption_one} {caption_two} {caption_three}"

# def select_a_set_of_values(data, column_one, threshold_data):
    
#     return data.loc[data[column_one] > threshold_data]

position_columns = list(name_the_zones.values())
fragment_columns = list(particle_groups.values())

args = [("particules", x) for x in position_columns]

# water_line = select_a_set_of_values(df4, args[0], average_of_sample_totals).index
# plage_seche = select_a_set_of_values(df4, args[1], average_of_sample_totals).index

position_sample_totals = position_totals.groupby(["echantillon", "Plage", "position"], as_index=False).particules.sum()

summary_ligne_deau = position_sample_totals[position_sample_totals.position == "ligne-d'eau"].particules.describe()
summary_plage_seche = position_sample_totals[position_sample_totals.position == "plage-seche"].particules.describe()

ligne_deau_display =  translate_describe(summary_ligne_deau, "ligne d'eau")
plage_seche_display =  translate_describe(summary_plage_seche, "plage-seche")

position_summary = pd.concat([ligne_deau_display, plage_seche_display], axis=1).T.style.set_table_styles(table_css_styles)

table_no += 1
caption = "Le résumé des totaux d'échantillons par position sur la plage."

table_five = add_table_to_page(position_summary, table_no, caption, section, page)
glue('tablea15', table_five, display=True)

Unnamed: 0,Min,25%,50%,75%,Max,Moyenne,Écart-Type
Ligne D'Eau,1900,5350,8500,12900,88400,11684,12553
Plage-Seche,0,10500,17000,23825,"2'991,00",24642,34951


In [10]:
fig, ax = plt.subplots(2,2, figsize=(7,7))

d_position = position_totals.groupby(["echantillon", "position"], as_index=False).particules.sum()

sns.scatterplot(data=d_position, x="echantillon", y="particules", hue="position", ax=ax[0,0])
ax[0,0].tick_params(labelbottom=False, bottom=False)
ax[0,0].set_ylim(-1, 1000)
ax[0,0].set_title("Total par échantillon", loc="left")
ax[0,0].legend().remove()

sns.boxplot(data=d_position, x="position", y="particules", hue="position", dodge=False, width=.9, ax=ax[0,1])
ax[0,1].set_ylim(-1, 1000)
ax[0,1].set_title("Boîte de Tukey", loc="left")
ax[0,1].set_xlabel("")
ax[0,1].legend().remove()

sns.histplot(data=d_position, x="particules", hue="position", ax=ax[1,0], multiple="stack", stat="probability", kde=True)
ax[1,0].set_xlim(-10, 1000)
ax[1,0].yaxis.set_major_formatter('{x:.2f}')
ax[1,0].set_ylabel("probabilité")
ax[1,0].set_title("Histogramme", loc="left")



sns.ecdfplot(data=d_position, x="particules", hue="position", ax=ax[1,1])
ax[1,1].set_xlim(-1, 1000)
ax[1,1].set_ylabel("probabilité")
ax[1,1].set_title("Fonction de répartition", loc="left")

plt.tight_layout()
glue('fig-A13', fig, display=False)

plt.close()

```{glue:figure} fig-A13
---
name: fig-A13
---
{glue:text}`blank_caption` 
```

### Nombre moyen par forme et plage

In [11]:
forme_totals = work_data.groupby(["Plage","echantillon", "objet"], as_index=False).particules.sum()
df3 = forme_totals.copy()

average_of_sample_formes = df3.groupby(["objet"], as_index=True).agg({"particules":"mean"})
average_of_sample_formes.rename(columns={"particules":value_column}, inplace=True)

In [12]:
tex_dure = forme_totals[forme_totals.objet=="fdure"].particules.describe()
rigid_display = translate_describe(tex_dure, "rigides") # .style.set_table_styles(table_css_styles).format(precision=2)

tex_souple = forme_totals[forme_totals.objet=="souple"].particules.describe()
souple_display = translate_describe(tex_souple, "souple")# .style.set_table_styles(table_css_styles).format(precision=2)

tex_fibres = forme_totals[forme_totals.objet=="fibres"].particules.describe()

fibres_display = translate_describe(tex_fibres, "fibres") # .style.set_table_styles(table_css_styles).format(precision=2)

table_no += 1
caption = "Résultats par forme de particule."

forme_summary = pd.concat([rigid_display, souple_display, fibres_display], axis=1).T.style.set_table_styles(table_css_styles)
table_six = add_table_to_page(forme_summary, table_no, caption, section, page)
glue("table-six", table_six, display=True)

Unnamed: 0,Min,25%,50%,75%,Max,Moyenne,Écart-Type
Rigides,0,300,1100,2300,69600,2353,5812
Souple,0,0,400,1400,60400,1878,6014
Fibres,0,5400,9800,15600,"2'593,00",13783,21169


<!-- |Particules rigides|Particules souples|Fibres|
|:----------------:|:----------------:|:----------:|
|{glue:}`rigid_display`|{glue:}`souple_display`|{glue:}`fibres_display`| -->

In [13]:
# the average sample total by form of particle
forme_averages = df3.groupby(["Plage", "objet"], as_index=False).agg({"particules":"mean"})
forme_averages.rename(columns={"particules":value_column}, inplace=True)

forme_averages = forme_averages.pivot(index="Plage", columns="objet").droplevel(0, axis=1)
forme_averages.index.name = None
forme_averages.rename(columns=particle_groups, inplace=True)

# identifyt the values that exceed threshold
select_fdure_values = forme_averages["particule rigide"] > average_of_sample_formes.loc["fdure", value_column]
select_souple_values = forme_averages["particule souple"] > average_of_sample_formes.loc["souple", value_column]
select_fiber_vales = forme_averages["fibre"] > average_of_sample_formes.loc["fibres", value_column]

# select the the values that exceed threshold
test_three = forme_averages.loc[select_fdure_values].index
test_four = forme_averages.loc[select_souple_values].index
test_five = forme_averages.loc[select_fiber_vales].index

caption = "Le nombre moyen par échantillon par plage et forme."

sa = forme_averages.style.set_table_styles(table_css_styles)

# highlight values that exceed table styles
sa = sa.set_properties(subset = pd.IndexSlice[test_three,["particule rigide"]], **a_property)
sa = sa.set_properties(subset = pd.IndexSlice[test_four,["particule souple"]], **a_property)
sa = sa.set_properties(subset = pd.IndexSlice[test_five,["fibre"]], **a_property)

table_no += 1

table_seven = add_table_to_page(sa, table_no, caption, section, page, rule)
glue("table-seven", table_seven, display=True)

objet,Particule Rigide,Fibre,Particule Souple
Amphion,790,11000,710
Anthy,917,9167,1333
Aubonne,1188,7012,1262
Baby Plage,1200,9809,700
Bouveret,1375,14725,1517
Clarens,633,5750,533
Crans,200,1700,0
Cully,550,5850,0
Excenevex,1939,9714,1829
Gland,800,2600,600


In [14]:
fig, ax = plt.subplots(2,2, figsize=(8,8))

df3["objet"] = df3["objet"].apply(lambda x: particle_groups[x])

sns.scatterplot(data=df3, x="echantillon", y="particules", hue="objet", ax=ax[0,0])
ax[0,0].tick_params(labelbottom=False, bottom=False)
ax[0,0].set_ylim(-1, 1000)
ax[0,0].set_title("Total par échantillon", loc="left")
# ax[0,0].legend().remove()

sns.boxplot(data=df3, x="objet", y="particules", hue="objet", dodge=False, width=.9, ax=ax[0,1])
ax[0,1].set_ylim(-1, 1000)
ax[0,1].set_title("Boîte de Tukey", loc="left")
ax[0,1].set_xlabel("")
ax[0,1].legend().remove()

sns.histplot(data=df3, x="particules", hue="objet", ax=ax[1,0], multiple="stack", stat="probability", kde=True)
ax[1,0].set_xlim(-10, 600)
ax[1,0].yaxis.set_major_formatter('{x:.2f}')
ax[1,0].set_ylabel("probabilité")
ax[1,0].set_title("Histogramme", loc="left")



sns.ecdfplot(data=df3, x="particules", hue="objet", ax=ax[1,1])
ax[1,1].set_xlim(-1, 600)
ax[1,1].set_ylabel("probabilité")
ax[1,1].set_title("Fonction de répartition", loc="left")

plt.tight_layout()

plt.tight_layout()
glue('fig-A14', fig, display=False)

plt.close()

```{glue:figure} fig-A14
---
name: fig-A14
---
{glue:text}`blank_caption` 
```

### Nombre moyen par forme et position

In [15]:
position_totals = work_data.groupby(["Plage","echantillon", "position", "objet"], as_index=False).particules.sum()
position_totals["position"] = position_totals.position.apply(lambda x: name_the_zones[x])
position_totals["objet"] = position_totals.objet.apply(lambda x: particle_groups[x])


# summaries

leau_dure = position_totals[(position_totals.position == "ligne-d'eau") & (position_totals.objet == "particule rigide")].particules.describe()
leau_souple = position_totals[(position_totals.position == "ligne-d'eau") & (position_totals.objet == "particule souple")].particules.describe()
leau_fibre = position_totals[(position_totals.position == "ligne-d'eau") & (position_totals.objet == "fibre")].particules.describe()

leau_dure_d = translate_describe(leau_dure, "rigides") #.style.set_table_styles(table_css_styles)
leau_souple_d = translate_describe(leau_souple, "souples") #.style.set_table_styles(table_css_styles)
leau_fibre_d = translate_describe(leau_fibre, "fibres") #.style.set_table_styles(table_css_styles)
# glue("leau_dure_display", leau_dure_d, display=False)
# glue("leau_souple_display", leau_souple_d, display=False)
# glue("leau_fibre_display", leau_fibre_d, display=False)

formes_ldeau = pd.concat([leau_dure_d, leau_souple_d, leau_fibre_d], axis=1).T.style.set_table_styles(table_css_styles)

table_no += 1

caption = "Ligne d'eau: le nombre moyen de particules trouvées par échantillon selon la forme du paritcle."

table_eight = add_table_to_page(formes_ldeau, table_no, caption, section, page)
glue("table-eight", table_eight, display=True)

Unnamed: 0,Min,25%,50%,75%,Max,Moyenne,Écart-Type
Rigides,0,200,700,1400,11200,1224,1640
Souples,0,0,100,350,60400,1786,8122
Fibres,1700,3900,7200,11650,27300,8673,5866


In [16]:
plage_dure = position_totals[(position_totals.position == "plage-seche") & (position_totals.objet == "particule rigide")].particules.describe()
plage_souple = position_totals[(position_totals.position == "plage-seche") & (position_totals.objet == "particule souple")].particules.describe()
plage_fibre = position_totals[(position_totals.position == "plage-seche") & (position_totals.objet == "fibre")].particules.describe()

plage_dure_d = translate_describe(plage_dure, "rigides") # .style.set_table_styles(table_css_styles).format(precision=2)
plage_souple_d = translate_describe(plage_souple, "souples")# .style.set_table_styles(table_css_styles).format(precision=2)
plage_fibre_d = translate_describe(plage_fibre, "fibres")# .style.set_table_styles(table_css_styles).format(precision=2)

formes_pseche = pd.concat([plage_dure_d, plage_souple_d, plage_fibre_d], axis=1).T.style.set_table_styles(table_css_styles)

table_no += 1

caption = "Plage seche: le nombre moyen de particules trouvées par échantillon selon la forme du paritcle."

table_nine = add_table_to_page(formes_pseche, table_no, caption, section, page)
glue("table-nine", table_nine, display=True)

Unnamed: 0,Min,25%,50%,75%,Max,Moyenne,Écart-Type
Rigides,0,800,1500,3075,69600,3534,7995
Souples,0,500,1100,2675,15300,1974,2296
Fibres,0,7850,12400,18700,"2'593,00",19134,28798


In [17]:

df4 = position_totals.groupby(["Plage","position", "objet"], as_index=False).particules.mean()

df4 = df4.pivot(index="Plage", columns=["position", "objet"])
df4.index.name = None
df4.columns
# df4.fillna(0, inplace=True)

def select_a_set_of_values(data, column_one, search_index, search_value, average_of_sample_formes):
    
    return data.loc[data[column_one] > average_of_sample_formes.loc[search_index, search_value]]

args_one = [("particules", position_columns[0], x) for x in fragment_columns]
args_two = [("particules", position_columns[1], x) for x in fragment_columns]

args_0 = select_a_set_of_values(df4, args_one[0], list(particle_groups.keys())[0], value_column, average_of_sample_formes).index
args_1 = select_a_set_of_values(df4, args_one[1], list(particle_groups.keys())[1], value_column, average_of_sample_formes).index
args_2  = select_a_set_of_values(df4, args_one[2], list(particle_groups.keys())[2], value_column, average_of_sample_formes).index

args_2_0 = select_a_set_of_values(df4, args_two[0], list(particle_groups.keys())[0], value_column, average_of_sample_formes).index
args_2_1 = select_a_set_of_values(df4, args_two[1], list(particle_groups.keys())[1], value_column, average_of_sample_formes).index
args_2_2 = select_a_set_of_values(df4, args_two[2], list(particle_groups.keys())[2], value_column, average_of_sample_formes).index

sax = df4.style.set_table_styles(table_css_styles)
  
sax = sax.set_properties(subset = pd.IndexSlice[args_2, args_one[2]],**a_property)
sax = sax.set_properties(subset = pd.IndexSlice[args_1,args_one[1]],**a_property)
sax = sax.set_properties(subset = pd.IndexSlice[args_0,args_one[0]], **a_property)
sax = sax.set_properties(subset = pd.IndexSlice[args_2_2, args_two[2]], **a_property)
sax = sax.set_properties(subset = pd.IndexSlice[args_2_1,args_two[1]],**a_property)
sax = sax.set_properties(subset = pd.IndexSlice[args_2_0,args_two[0]], **a_property)

sax

table_no += 1

caption = "Plage seche: le nombre moyen de particules trouvées par échantillon selon la forme du paritcle."

table_ten = add_table_to_page(sax, table_no, caption, section, page)
glue("table-ten", table_ten, display=True)

Unnamed: 0_level_0,Particules,Particules,Particules,Particules,Particules,Particules
position,Ligne-D'Eau,Ligne-D'Eau,Ligne-D'Eau,Plage-Seche,Plage-Seche,Plage-Seche
objet,Fibre,Particule Rigide,Particule Souple,Fibre,Particule Rigide,Particule Souple
Amphion,10700,125,325,11200,1233.0,967.0
Anthy,4100,733,700,14233,1100.0,1967.0
Aubonne,4960,1260,60,10433,1067.0,3267.0
Baby Plage,6300,600,0,12733,1700.0,1283.0
Bouveret,7543,1514,29,24780,1180.0,3600.0
Clarens,4875,475,75,7500,950.0,1450.0
Crans,1700,200,0,,,
Cully,11700,1100,0,000,0.0,0.0
Excenevex,9086,443,700,9924,2438.0,2205.0
Gland,2600,800,600,,,


### Distributions



In [18]:
fig, ax = plt.subplots(1)

sns.boxplot(data=position_totals, x="position", y="particules", hue="objet", ax=ax)
# sns.boxplot(data=position_totals, x="objet", y="particules", hue="objet",ax=ax[1])

ax.set_ylim(-10, 1000)
ax.set_xlabel("")

plt.tight_layout()
glue('fig-A15', fig, display=False)

plt.close()

```{glue:figure} fig-A15
---
name: fig-A15
---
{glue:text}`blank_caption` 
```

In [19]:
position_totals = work_data.groupby(["Plage","echantillon", "position"], as_index=False).particules.sum()
position_totals["position"] = position_totals.position.apply(lambda x: name_the_zones[x])
df4 = position_totals.groupby(["Plage","position"], as_index=False).particules.mean()

fig, ax = plt.subplots(1, figsize=(8,5))

sns.boxplot(data=position_totals, x="Plage", y="particules", hue="position", ax=ax)

ax.set_xlabel("")
ax.tick_params(axis="x", labelrotation=90)

plt.tight_layout()
glue('fig-A16', fig, display=False)

plt.close()

```{glue:figure} fig-A16
---
name: fig-A16
---
{glue:text}`blank_caption` 
```

In [20]:
%watermark --iversions -b -r

Git repo: https://github.com/hammerdirt-analyst/plastock.git

Git branch: main

numpy     : 1.24.2
pandas    : 2.0.0
seaborn   : 0.12.2
matplotlib: 3.7.1

