In [1]:
%load_ext watermark
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
from myst_nb import glue
from IPython.display import Markdown as md
from slugify import slugify

from plastockconf import name_zones, name_particles, name_frequentation, name_situation
from plastockconf import particle_groups, name_substrate, name_distance, table_css_styles

from plastock import attribute_summary, attribute_summary_test, attribute_summary_grid, add_table_to_page


glue('blank_caption', " ", display=False)
a_property =  {'color' : 'red'}
page = 4
table_no = 1

new_data = pd.read_csv("data/end_pipe/macro_current.csv")
beach_data = pd.read_csv("data/end_pipe/asl_beaches.csv").set_index('Plage')
n_beach_data = pd.read_csv("data/pstock_beaches_current.csv")

(macro-attributes)=
# Macro déchets plage et attribut

## Substrat

In [2]:
new_data = new_data[['Plage', 'Aire', 'Date','Code','Quantité','Substrat']].merge(beach_data[["frequentation", "situation", "distance", "orientation"]], left_on = "Plage", right_index=True)
# new_data.drop('Position', inplace=True, axis=1)

new_column_names = {
    "Substrat":"substrat",
    "Date":"date",
    "Code":"code",
    "Quantité":"quantité",
    "Aire":"area"
}

variables = ["substrat", "fréquentation", "situation", "distance", "orientation"]
groupby_cols = [ 'échantillon', 'Plage', 'slug', 'date', 'quantité', 'area', 'length', 'frequentation', 'situation', 'distance', 'orientation', 'substrat', 'code']
length_key = n_beach_data[["Plage","length"]].drop_duplicates("Plage").set_index("Plage")
work_data = new_data[["Plage",  'frequentation', 'situation', 'distance', 'orientation', *new_column_names.keys()]].copy()
work_data.rename(columns=new_column_names, inplace=True)
work_data["length"] = work_data.Plage.apply(lambda x: length_key.loc[x, "length"])
work_data["slug"] = work_data.Plage.apply(lambda x: slugify(x))
work_data["échantillon"] = list(zip(work_data.Plage, work_data['date']))
work_data = work_data.groupby(groupby_cols, as_index=False)['quantité'].sum()
work_data["fréquentation"] = work_data['frequentation']
work_data.drop('frequentation', inplace=True, axis=1)
work_data['date'] = pd.to_datetime(work_data["date"], format="mixed", dayfirst=True)
work_data.dropna(inplace=True)
work_data[variables[:-1]] = work_data[variables[:-1]].astype("int")
work_data["pcs/m2"] = work_data['quantité']/work_data.area

In [3]:
# columns = ['Plage', 'échantillon', 'orientation', 'position', 'substrat','fréquentation', 'situation', 'distance', "fibres", "fdure", "souple", "fragments", "added"]
column_display = {
    "count":"échantillons",
    "mean": "moyenne",
    "std": "écart type",
    "échantillon":"échantillon"
    
}

# ananlysis of substrat
voi = "substrat"
vals = "pcs/m2"
groupbys = ['échantillon', voi]
labels = name_substrate

event_total = work_data.groupby(["échantillon", voi], as_index=False)[vals].sum()

substrat_summary = attribute_summary(event_total, vals, voi, columns=column_display, labels=name_substrate)
event_total['échantillon'] = event_total["échantillon"].astype(str)

caption = 'Le résumé des résultats du nombre de déchets trouvées par m² dans chaque échantillon pour chaque substrat.'
section = 'A'
rule = 'Les attributs dont la moyenne des résultats est supérieure à la moyenne du projet sont en rouge.'

table_one = add_table_to_page(substrat_summary, table_no, caption, section, page, rule)
glue('tablea41', table_one, display=True)

Unnamed: 0,Échantillons,Moyenne,Écart type,Min,25%,50%,75%,Max
Cailloux,36,113,105,0,28,61,192,3
Graviers,16,62,63,0,20,46,64,2
Sables fins,27,259,244,0,91,156,437,8
Sables grossiers,31,69,82,0,29,40,63,3


In [4]:
attribute_summary_grid(event_total, vals, voi, 'fig-a41', labels=labels, xlim=10, ylim=10)

```{glue:figure} fig-a41
---
name: fig-a41
---
{glue:text}`blank_caption` 
```

### Les plages substrat = sables fins

In [5]:
md(', '.join(work_data[work_data.substrat.isin([1])].Plage.unique()))

Baby Plage, Bouveret, Excenevex, Grangettes, Préverenges, Rolle, Vidy

## Fréquentation

In [6]:
# ananlysis of fréquentation
voi = "fréquentation"
vals = "pcs/m2"
groupbys = ['échantillon', voi]
labels = name_frequentation

event_total = work_data.groupby(["échantillon", "Plage","date", voi], as_index=False)[vals].sum()
table_no += 1
caption = 'Le résumé des résultats du nombre de déchets trouvées par m²dans chaque échantillon pour chaque niveau de fréquentation.'
event_total['échantillon'] = event_total["échantillon"].astype(str)

substrat_summary = attribute_summary(event_total, vals, voi, columns=column_display, labels=labels)
table_two = add_table_to_page(substrat_summary, table_no, caption, section, page, rule)
glue('tablea42', table_two, display=True)

Unnamed: 0,Échantillons,Moyenne,Écart type,Min,25%,50%,75%,Max
Elevée,55,153,161,0,64,94,189,8
Faible,15,37,16,0,25,38,49,0
Moyenne,28,187,206,0,34,105,262,8


In [7]:
attribute_summary_grid(event_total, vals, voi, 'fig-a42', labels=labels, xlim=10, ylim=10)

```{glue:figure} fig-a42
---
name: fig-a42
---
{glue:text}`blank_caption` 
```

### Les plages fréquentation = moyenne

In [8]:
md(', '.join(work_data[work_data['fréquentation'].isin([2])].Plage.unique()))

Anthy, Aubonne, Gland, Grangettes, Lugrin, Pichette, Saint-Disdille

### Les plages fréquentation = élevée

In [9]:
md(', '.join(work_data[work_data['fréquentation'].isin([3])].Plage.unique()))

Amphion, Baby Plage, Bouveret, Clarens, Excenevex, Hermance, Lutry, Port Choiseul, Préverenges, Rolle, Savonnière, Tougues, Versoix, Vidy

## Situation

In [10]:
# ananlysis of situation
voi = "situation"
vals = "pcs/m2"
groupby = ['échantillon', voi]
labels = name_situation

event_total = work_data.groupby(["échantillon", "Plage","date", voi], as_index=False)[vals].sum()
table_no += 1

caption = 'Le résumé des résultats du nombre de déchets trouvées par m²dans chaque échantillon pour chaque situation: urbain, campagne.'
event_total['échantillon'] = event_total["échantillon"].astype(str)

sit = attribute_summary(event_total, vals, voi, columns=column_display, labels=labels)
table_three = add_table_to_page(sit, table_no, caption, section, page, rule)
glue('tablea43', table_three, display=True)

Unnamed: 0,Échantillons,Moyenne,Écart type,Min,25%,50%,75%,Max
Campagne,67,159,194,0,36,70,215,8
Urbain,31,114,93,0,57,94,147,4


In [11]:
attribute_summary_grid(event_total, vals, voi, 'fig-a43', labels=labels, xlim=10, ylim=10)

```{glue:figure} fig-a43
---
name: fig-a43
---
{glue:text}`blank_caption` 
```

### Les plages situation = campagne

In [12]:
md(', '.join(work_data[work_data.situation.isin([1])].Plage.unique()))

Amphion, Anthy, Aubonne, Bouveret, Crans, Cully, Excenevex, Gland, Grangettes, Lugrin, Pichette, Préverenges, Saint-Disdille, Savonnière, Tolochenaz, Tougues, Versoix

### Les plages situation = urbain

In [13]:
md(', '.join(work_data[work_data.situation.isin([2])].Plage.unique()))

Baby Plage, Clarens, Hermance, Lutry, Meillerie, Port Choiseul, Rolle, Vidy

## Distance

In [14]:
# ananlysis of distance
voi = "distance"
vals = "pcs/m2"
groupby = ['échantillon', voi]
labels = name_distance

event_total = work_data.groupby(["échantillon", "Plage","date", voi], as_index=False)[vals].sum()

table_no += 1

caption = 'Le résumé des résultats du nombre de déchets trouvées par m²dans chaque échantillon selon la distance du parking.'
event_total['échantillon'] = event_total["échantillon"].astype(str)
dist = attribute_summary(event_total, vals, voi, columns=column_display, labels=labels)
table_three = add_table_to_page(dist, table_no, caption, section, page, rule)
glue('tablea44', table_three, display=True)

Unnamed: 0,Échantillons,Moyenne,Écart type,Min,25%,50%,75%,Max
100 - 500 m,44,184,204,0,67,111,194,8
500 - 1000 m,7,43,15,0,38,40,52,0
< 100 m,39,142,140,0,38,78,215,5
> 1000 m,8,32,14,0,21,28,43,0


In [15]:
attribute_summary_grid(event_total, vals, voi, 'fig-a44', labels=labels, xlim=10, ylim=10)

```{glue:figure} fig-a44
---
name: fig-a44
---
{glue:text}`blank_caption` 
```

### Les plages distance = 100 - 500 m 

In [16]:
md(', '.join(work_data[work_data.distance.isin([2])].Plage.unique()))

Baby Plage, Bouveret, Grangettes, Hermance, Lugrin, Lutry, Port Choiseul, Préverenges, Tougues, Versoix, Vidy

### Les plages distance = < 100 m  

In [17]:
md(', '.join(work_data[work_data.distance.isin([1])].Plage.unique()))

Amphion, Anthy, Clarens, Cully, Excenevex, Gland, Pichette, Rolle, Saint-Disdille, Savonnière

## Orientation

In [18]:
# ananlysis of substrat
voi = "orientation"
vals = "pcs/m2"
groupby = ['échantillon', voi]

data_summary = attribute_summary_test(work_data, voi=voi, vals=vals)
data_summary[['count', 'max', 'min']] = data_summary[['count', 'max', 'min']].astype('int')
data_summary.rename(columns=column_display, inplace=True)

select_values = data_summary["moyenne"] > 1.45
test_one = data_summary.loc[select_values].index

select_values = data_summary["50%"] > .78
test_two = data_summary.loc[select_values].index

d_sum = data_summary.style.set_table_styles(table_css_styles)
d_sum = d_sum.set_properties(subset = pd.IndexSlice[test_one,["moyenne"]], **a_property)
d_sum = d_sum.set_properties(subset = pd.IndexSlice[test_two,["50%"]], **a_property)

table_no += 1

caption = 'Le résumé des résultats du nombre de déchets trouvées par m² dans chaque échantillon selon l\'orientation de la plage'

table_five = add_table_to_page(d_sum, table_no, caption, section, page, rule, format_index='columns')
glue('tablea45', table_five, display=True)

Unnamed: 0,Échantillons,Moyenne,Écart type,Min,25%,50%,75%,Max
E,4,232,138,0,142,228,319,3
ESE,4,60,8,0,56,59,64,0
N,8,150,117,0,45,144,246,3
NE,12,215,153,0,136,194,262,5
NNE,8,126,49,0,93,111,151,2
NNO,4,32,13,0,26,32,38,0
NO,12,235,277,0,51,77,475,8
ONO,8,256,318,0,37,68,442,8
OSO,4,26,31,0,9,14,31,0
SE,4,30,18,0,18,25,36,0


In [19]:
%watermark --iversions -b -r

Git repo: https://github.com/hammerdirt-analyst/plastock.git

Git branch: nov29

numpy     : 1.24.2
matplotlib: 3.7.1
pandas    : 2.0.0
seaborn   : 0.12.2

