In [1]:
%load_ext watermark
import numpy as np
import pandas as pd
import datetime as dt
from slugify import slugify

from plastockmethods import name_zones, name_particles, name_frequentation, name_situation
from plastockmethods import particle_groups, name_substrate, name_distance, table_css_styles

# miro data
work_data = pd.read_csv("data/end_pipe/long_form_micro.csv")
beach_data = pd.read_csv("data/end_pipe/asl_beaches.csv")

# macro data
new_data = pd.read_csv("data/macro_current.csv")
beach_data = pd.read_csv("data/pstock_beaches_current.csv")

new_column_names = {
    "Position":"position",
    "Substrat":"substrat",
    "Date":"date",
    "Code":"code",
    "Quantité":"quantite",
    "Aire":"area"
}

length_key = beach_data[["Plage","length"]].drop_duplicates("Plage").set_index("Plage")
work_datad = new_data[["Plage", *new_column_names.keys()]].copy()
work_datad.rename(columns=new_column_names, inplace=True)
work_datad["length"] = work_datad.Plage.apply(lambda x: length_key.loc[x, "length"])
work_datad["slug"] = work_datad.Plage.apply(lambda x: slugify(x))
work_datad["echantillon"] = list(zip(work_datad.slug, work_datad['date']))
work_datad['date'] = pd.to_datetime(work_datad["date"], format="mixed", dayfirst=True)
work_datad.dropna(inplace=True)
work_datad[["position", "substrat"]] = work_datad[["position", "substrat"]].astype("int")
event_total = work_datad.groupby(["echantillon", "Plage", "date", "position", "area"], as_index=False).quantite.sum()
event_total["pcs/m²"] = event_total.quantite/event_total.area

# location summary methods

Cela concerne: location_summary.ipynb, macro_summary.ipynb

## Données

### micro

In [2]:
work_data.head()

Unnamed: 0,Plage,echantillon,orientation,position,substrat,frequentation,situation,distance,objet,compte
0,Amphion,74_Amp_1,NE,1,4,3,1,1,fibres,97
1,Amphion,74_Amp_10,NNE,2,4,3,1,1,fibres,140
2,Amphion,74_Amp_2,NNE,1,4,3,1,1,fibres,121
3,Amphion,74_Amp_3,NE,1,4,3,1,1,fibres,31
4,Amphion,74_Amp_4,NNE,1,4,3,1,1,fibres,179


### macro

In [3]:
event_total.head()

Unnamed: 0,echantillon,Plage,date,position,area,quantite,pcs/m²
0,"(amphion, 01.02.2022)",Amphion,2022-02-01,1,98,67,0.683673
1,"(amphion, 01.02.2022)",Amphion,2022-02-01,2,342,796,2.327485
2,"(amphion, 03.05.2022)",Amphion,2022-05-03,1,98,19,0.193878
3,"(amphion, 03.05.2022)",Amphion,2022-05-03,2,342,383,1.119883
4,"(amphion, 18.10.2022)",Amphion,2022-10-18,1,98,246,2.510204


### Compte moyen par échantillon et plage

Cela concerne: Table A1-4, A1-5, A3-3, A3-4

#### Micro table A1-4

In [4]:
# table A1-4
work_data["particules"] = work_data["compte"]
sample_totals = work_data.groupby(["Plage","echantillon"], as_index=False). particules.sum()
df2 = sample_totals.copy()
sample_totals.particules.describe()

count     217.000000
mean      180.133641
std       267.619523
min         0.000000
25%        70.000000
50%       121.000000
75%       198.000000
max      2991.000000
Name: particules, dtype: float64

#### Macro table A3-3

In [5]:
# table A3-3
sample_totalsd = event_total.groupby(["echantillon", "date", "Plage"], as_index=False)["pcs/m²"].sum()
df2d = sample_totalsd.copy()
value_column = "moyenne"

sample_totalsd["pcs/m²"].describe()

count    98.000000
mean      1.447740
std       1.691432
min       0.049057
25%       0.380307
50%       0.783439
75%       1.890987
max       8.680434
Name: pcs/m², dtype: float64

### Compte moyenne par position

#### Micro table A1-5

In [6]:
# table A1-5
position_totals = work_data.groupby(["Plage","echantillon", "position"], as_index=False).particules.sum()
position_sample_totals = position_totals.groupby(["echantillon", "Plage", "position"], as_index=False).particules.sum()

summary_ligne_deau = position_sample_totals[position_sample_totals.position == 1].particules.describe()
summary_plage_seche = position_sample_totals[position_sample_totals.position == 2].particules.describe()

summary_ligne_deau

count    111.000000
mean     116.837838
std      125.527146
min       19.000000
25%       53.500000
50%       85.000000
75%      129.000000
max      884.000000
Name: particules, dtype: float64

In [7]:
summary_plage_seche

count     106.000000
mean      246.415094
std       349.507132
min         0.000000
25%       105.000000
50%       170.000000
75%       238.250000
max      2991.000000
Name: particules, dtype: float64

#### Macro table A3-4

In [8]:
position_totalsd = event_total.groupby(["Plage","echantillon","date", "position"], as_index=False)["pcs/m²"].sum()
# position_totals["position"] = position_totals.position.apply(lambda x: name_the_zones[x])
# position_totals["objet"] = position_totals.objet.apply(lambda x: particle_groups[x])
df4d = position_totalsd.groupby(["Plage","position"], as_index=False)["pcs/m²"].mean()

summary_ligne_deaud = position_totalsd[position_totalsd.position == 1]["pcs/m²"].describe()
summary_plage_seched = position_totalsd[position_totalsd.position == 2]["pcs/m²"].describe()

summary_ligne_deaud

count    95.000000
mean      0.842366
std       1.261876
min       0.019048
25%       0.179830
50%       0.375000
75%       0.914443
max       7.595960
Name: pcs/m², dtype: float64

In [9]:
summary_plage_seched

count    88.000000
mean      0.702884
std       0.735364
min       0.005263
25%       0.181847
50%       0.404547
75%       1.075806
max       3.069231
Name: pcs/m², dtype: float64

### Nombre moyen par forme et plage

Concernant table A1-6

In [10]:
forme_totals = work_data.groupby(["Plage","echantillon", "objet"], as_index=False).particules.sum()
df3 = forme_totals.copy()
df3.head()

Unnamed: 0,Plage,echantillon,objet,particules
0,Amphion,74_Amp_1,fdure,0
1,Amphion,74_Amp_1,fibres,97
2,Amphion,74_Amp_1,souple,0
3,Amphion,74_Amp_10,fdure,38
4,Amphion,74_Amp_10,fibres,140


__dur__

In [11]:
tex_dure = forme_totals[forme_totals.objet=="fdure"].particules.describe()

tex_dure

count    217.000000
mean      23.525346
std       58.124073
min        0.000000
25%        3.000000
50%       11.000000
75%       23.000000
max      696.000000
Name: particules, dtype: float64

#### souple

In [12]:
tex_souple = forme_totals[forme_totals.objet=="souple"].particules.describe()
tex_souple

count    217.000000
mean      18.778802
std       60.139706
min        0.000000
25%        0.000000
50%        4.000000
75%       14.000000
max      604.000000
Name: particules, dtype: float64

#### fibres

In [13]:
tex_fibres = forme_totals[forme_totals.objet=="fibres"].particules.describe()
tex_fibres

count     217.000000
mean      137.829493
std       211.692897
min         0.000000
25%        54.000000
50%        98.000000
75%       156.000000
max      2593.000000
Name: particules, dtype: float64

### Nombre moyen par forme et position

Concernant table A1-8 et A1-9

#### ligne deau

__dur__

In [14]:
position_totals = work_data.groupby(["Plage","echantillon", "position", "objet"], as_index=False).particules.sum()
position_totals.head()

Unnamed: 0,Plage,echantillon,position,objet,particules
0,Amphion,74_Amp_1,1,fdure,0
1,Amphion,74_Amp_1,1,fibres,97
2,Amphion,74_Amp_1,1,souple,0
3,Amphion,74_Amp_10,2,fdure,38
4,Amphion,74_Amp_10,2,fibres,140


In [15]:
leau_dure = position_totals[(position_totals.position == 1) & (position_totals.objet == "fdure")].particules.describe()
leau_souple = position_totals[(position_totals.position == 1) & (position_totals.objet == "souple")].particules.describe()
leau_fibre = position_totals[(position_totals.position == 1) & (position_totals.objet == "fibres")].particules.describe()

leau_dure 

count    111.000000
mean      12.243243
std       16.400785
min        0.000000
25%        2.000000
50%        7.000000
75%       14.000000
max      112.000000
Name: particules, dtype: float64

__souple__

In [16]:
leau_souple

count    111.000000
mean      17.864865
std       81.222868
min        0.000000
25%        0.000000
50%        1.000000
75%        3.500000
max      604.000000
Name: particules, dtype: float64

__fibres__

In [17]:
leau_fibre

count    111.000000
mean      86.729730
std       58.658169
min       17.000000
25%       39.000000
50%       72.000000
75%      116.500000
max      273.000000
Name: particules, dtype: float64

#### plage seche

__dur__

In [18]:
plage_dure = position_totals[(position_totals.position == 2) & (position_totals.objet == "fdure")].particules.describe()
plage_souple = position_totals[(position_totals.position == 2) & (position_totals.objet == "souple")].particules.describe()
plage_fibre = position_totals[(position_totals.position == 2) & (position_totals.objet == "fibres")].particules.describe()

plage_dure

count    106.000000
mean      35.339623
std       79.953783
min        0.000000
25%        8.000000
50%       15.000000
75%       30.750000
max      696.000000
Name: particules, dtype: float64

__souple__

In [19]:
plage_souple

count    106.000000
mean      19.735849
std       22.959302
min        0.000000
25%        5.000000
50%       11.000000
75%       26.750000
max      153.000000
Name: particules, dtype: float64

__fibres__

In [20]:
plage_fibre

count     106.000000
mean      191.339623
std       287.980783
min         0.000000
25%        78.500000
50%       124.000000
75%       187.000000
max      2593.000000
Name: particules, dtype: float64

In [21]:
%watermark --iversions -b -r

Git repo: https://github.com/hammerdirt-analyst/plastock.git

Git branch: tests

numpy : 1.24.2
pandas: 2.0.0

