<a href="https://colab.research.google.com/github/JerKeller/MP-Ecotaxa/blob/main/variability.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing libraries

In [1]:
import pandas as pd
import numpy as np
import os
import sys
import subprocess
from plotnine import ggplot, aes, geom_point, geom_line, labs, ggtitle, scale_y_log10
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from google.colab import drive

In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
ecotaxa_export = '/content/drive/MyDrive/ecotaxa_export.tsv'
tsv_read = pd.read_csv(ecotaxa_export, sep='\t')

In [6]:
# Load data and extract columns needed
sub1 = tsv_read[['object_id', 'object_date', 'object_depth_min', 'object_depth_max', 'object_annotation_category',
                 'object_area', 'object_area_exc', 'object_major', 'object_minor', 'sample_tot_vol', 'process_particle_pixel_size_mm',
                 'acq_id']]

In [7]:
# Modify names of columns
sub1.columns = ['Label', 'Date', 'Min_depth', 'Max_depth', 'Taxa', 'area', 'areaExc', 'major', 'minor',
                'Tow_Vol', 'pixel_size_mm', 'acq_id']

# Remove not living organisms
sub2 = sub1[(sub1['Taxa'] != 'badfocus<artefact') & (sub1['Taxa'] != 'fiber<detritus') & 
            (sub1['Taxa'] != 'multiple<other') & (sub1['Taxa'] != 'detritus') & 
            (sub1['Taxa'] != 'egg sac<egg') & (sub1['Taxa'] != 't001') & 
            (sub1['Taxa'] != 't002') & (sub1['Taxa'] != 't003') & 
            (sub1['Taxa'] != 'othertocheck') & (sub1['Taxa'] != 'artefact') & 
            (sub1['Taxa'] != 'bubble') & (sub1['Taxa'] != 'dead<Copepoda')].copy()

In [23]:
var_data = sub2.loc[(sub2['acq_id'] == 'tot_n21_221221') | (sub2['acq_id'] == 'tot_n22_221221') | 
                (sub2['acq_id'] == 'tot_n23_221221') | (sub2['acq_id'] == 'tot_n24_221221') |
                (sub2['acq_id'] == 'tot_n25_221221') | (sub2['acq_id'] == 'tot_n26_221221') |
                (sub2['acq_id'] == 'tot_n27_221221') | (sub2['acq_id'] == 'tot_n28_221221') |
                (sub2['acq_id'] == 'tot_n29_221221') | (sub2['acq_id'] == 'tot_n40_230209') |
                (sub2['acq_id'] == 'tot_n41_230209') | (sub2['acq_id'] == 'tot_n42_230209')].copy()

In [24]:
# Convert date to datetime format
var_data['Date'] = pd.to_datetime(var_data['Date'], format='%Y%m%d')

Conversions from pixel to mm

In [25]:
var_data['Area_mm2'] = var_data['area'] * ((var_data['pixel_size_mm']) ** 2)
var_data['AreaExc_mm2'] = var_data['areaExc'] * ((var_data['pixel_size_mm']) ** 2)
var_data['Major_mm'] = var_data['major'] * var_data['pixel_size_mm']
var_data['Minor_mm'] = var_data['minor'] * var_data['pixel_size_mm']

In [26]:
var_data['V_mm3'] = (4 / 3) * np.pi * ((var_data['Major_mm'] / 2) * (var_data['Minor_mm'] / 2) * (var_data['Minor_mm'] / 2))

In [42]:
print(var_data)

                      Label       Date  Min_depth  Max_depth  \
8753     n21_221221_tot_1_1 2022-12-21       50.0      100.0   
8755     n21_221221_tot_1_3 2022-12-21       50.0      100.0   
8756     n21_221221_tot_1_4 2022-12-21       50.0      100.0   
8757     n21_221221_tot_1_5 2022-12-21       50.0      100.0   
8761     n21_221221_tot_1_9 2022-12-21       50.0      100.0   
...                     ...        ...        ...        ...   
18204  n42_230209_tot_1_279 2023-02-09        0.0       50.0   
18205  n42_230209_tot_1_280 2023-02-09        0.0       50.0   
18206  n42_230209_tot_1_281 2023-02-09        0.0       50.0   
18207  n42_230209_tot_1_282 2023-02-09        0.0       50.0   
18208  n42_230209_tot_1_284 2023-02-09        0.0       50.0   

                       Taxa     area  areaExc  major  minor  Tow_Vol  \
8753     Daphnia<Daphniidae   5828.0   5823.0   98.0   75.7   3.5342   
8755     Cyclops prealpinus   6954.0   6946.0  166.2   53.3   3.5342   
8756     Cyclop

In [50]:
grouped_data = var_data.groupby(['acq_id', 'Taxa', 'Date'])

In [51]:
species_count_data = grouped_data.agg({'Taxa': 'count', 'Tow_Vol': 'mean', 'Min_depth': 'first'}).rename(columns={'Taxa': 'Count', 'Tow_Vol': 'tv'})

In [58]:
print(species_count_data)

            acq_id                  Taxa       Date  Count      tv  Min_depth
0   tot_n21_221221    Cyclops prealpinus 2022-12-21     64  3.5342       50.0
1   tot_n21_221221    Daphnia<Daphniidae 2022-12-21     22  3.5342       50.0
2   tot_n21_221221          Eubosmina sp 2022-12-21      9  3.5342       50.0
3   tot_n21_221221  Eudiaptomus gracilis 2022-12-21     12  3.5342       50.0
4   tot_n21_221221     Leptodora kindtii 2022-12-21      1  3.5342       50.0
5   tot_n22_221221    Cyclops prealpinus 2022-12-21    154  2.1206       20.0
6   tot_n22_221221    Daphnia<Daphniidae 2022-12-21     10  2.1206       20.0
7   tot_n22_221221          Eubosmina sp 2022-12-21      5  2.1206       20.0
8   tot_n22_221221  Eudiaptomus gracilis 2022-12-21    133  2.1206       20.0
9   tot_n22_221221      nauplii<Copepoda 2022-12-21     28  2.1206       20.0
10  tot_n23_221221    Cyclops prealpinus 2022-12-21     36  1.4137        0.0
11  tot_n23_221221    Daphnia<Daphniidae 2022-12-21     28  1.41

In [52]:
species_count_data = species_count_data.reset_index()



In [54]:
var_data['Min_depth'] = var_data['Min_depth'].astype(int)

In [60]:
# Group species_count_data by Date and Min_depth
grouped_data = species_count_data.groupby(['Date', 'Min_depth'])

# Calculate the variance of the Count column within each group
count_var = grouped_data['Count'].var().reset_index().rename(columns={'Count': 'Count_var'})

# Merge species_count_data and count_var dataframes
merged_data = pd.merge(species_count_data, count_var, on=['Date', 'Min_depth'])

# Print the merged dataframe
print(merged_data)





            acq_id                  Taxa       Date  Count      tv  Min_depth  \
0   tot_n21_221221    Cyclops prealpinus 2022-12-21     64  3.5342       50.0   
1   tot_n21_221221    Daphnia<Daphniidae 2022-12-21     22  3.5342       50.0   
2   tot_n21_221221          Eubosmina sp 2022-12-21      9  3.5342       50.0   
3   tot_n21_221221  Eudiaptomus gracilis 2022-12-21     12  3.5342       50.0   
4   tot_n21_221221     Leptodora kindtii 2022-12-21      1  3.5342       50.0   
5   tot_n24_221221    Cyclops prealpinus 2022-12-21     33  3.5342       50.0   
6   tot_n24_221221    Daphnia<Daphniidae 2022-12-21      7  3.5342       50.0   
7   tot_n24_221221          Eubosmina sp 2022-12-21      4  3.5342       50.0   
8   tot_n24_221221  Eudiaptomus gracilis 2022-12-21      8  3.5342       50.0   
9   tot_n27_221221    Cyclops prealpinus 2022-12-21     25  3.5342       50.0   
10  tot_n27_221221    Daphnia<Daphniidae 2022-12-21      7  3.5342       50.0   
11  tot_n27_221221          

In [55]:
# Group var_data by Date and Min_depth
grouped_var = var_data.groupby(['Date', 'Min_depth'])

# Calculate the variance of the Count column within each group
count_var = grouped_var['Taxa'].count().groupby(['Date', 'Min_depth']).var().reset_index().rename(columns={'Taxa': 'Count_var'})

# Merge species_count_data and count_var dataframes
merged_data = pd.merge(species_count_data, count_var, on=['Date', 'Min_depth'])

# Print the merged dataframe
print(merged_data)

            acq_id                  Taxa       Date  Count      tv  Min_depth  \
0   tot_n21_221221    Cyclops prealpinus 2022-12-21     64  3.5342       50.0   
1   tot_n21_221221    Daphnia<Daphniidae 2022-12-21     22  3.5342       50.0   
2   tot_n21_221221          Eubosmina sp 2022-12-21      9  3.5342       50.0   
3   tot_n21_221221  Eudiaptomus gracilis 2022-12-21     12  3.5342       50.0   
4   tot_n21_221221     Leptodora kindtii 2022-12-21      1  3.5342       50.0   
5   tot_n24_221221    Cyclops prealpinus 2022-12-21     33  3.5342       50.0   
6   tot_n24_221221    Daphnia<Daphniidae 2022-12-21      7  3.5342       50.0   
7   tot_n24_221221          Eubosmina sp 2022-12-21      4  3.5342       50.0   
8   tot_n24_221221  Eudiaptomus gracilis 2022-12-21      8  3.5342       50.0   
9   tot_n27_221221    Cyclops prealpinus 2022-12-21     25  3.5342       50.0   
10  tot_n27_221221    Daphnia<Daphniidae 2022-12-21      7  3.5342       50.0   
11  tot_n27_221221          

In [40]:
# Group the species_count_data dataframe by date and depth
grouped_counts = species_count_data.groupby(['Date', 'Min_depth'])

# Calculate the variance of the Count column within each group
count_var = grouped_counts['Count'].var()

KeyError: ignored

In [37]:
print (species_count_data)

                                                Count      tv
acq_id         Taxa                 Date                     
tot_n21_221221 Cyclops prealpinus   2022-12-21     64  3.5342
               Daphnia<Daphniidae   2022-12-21     22  3.5342
               Eubosmina sp         2022-12-21      9  3.5342
               Eudiaptomus gracilis 2022-12-21     12  3.5342
               Leptodora kindtii    2022-12-21      1  3.5342
tot_n22_221221 Cyclops prealpinus   2022-12-21    154  2.1206
               Daphnia<Daphniidae   2022-12-21     10  2.1206
               Eubosmina sp         2022-12-21      5  2.1206
               Eudiaptomus gracilis 2022-12-21    133  2.1206
               nauplii<Copepoda     2022-12-21     28  2.1206
tot_n23_221221 Cyclops prealpinus   2022-12-21     36  1.4137
               Daphnia<Daphniidae   2022-12-21     28  1.4137
               Eubosmina sp         2022-12-21      1  1.4137
               Eudiaptomus gracilis 2022-12-21    160  1.4137
        

In [29]:
concentration_data = species_count_data.copy()
concentration_data['Concentration'] = concentration_data['Count'] / concentration_data['tv']
concentration_data.reset_index(inplace=True)

In [38]:
variation_data = pd.merge(concentration_data, grouped_data[['acq_id', 'Min_depth']], on='acq_id')

TypeError: ignored

In [36]:
print(variation_data)

               acq_id                Taxa       Date  Count      tv  \
0      tot_n21_221221  Cyclops prealpinus 2022-12-21     64  3.5342   
1      tot_n21_221221  Cyclops prealpinus 2022-12-21     64  3.5342   
2      tot_n21_221221  Cyclops prealpinus 2022-12-21     64  3.5342   
3      tot_n21_221221  Cyclops prealpinus 2022-12-21     64  3.5342   
4      tot_n21_221221  Cyclops prealpinus 2022-12-21     64  3.5342   
...               ...                 ...        ...    ...     ...   
11715  tot_n42_230209    nauplii<Copepoda 2023-02-09     10  3.5342   
11716  tot_n42_230209    nauplii<Copepoda 2023-02-09     10  3.5342   
11717  tot_n42_230209    nauplii<Copepoda 2023-02-09     10  3.5342   
11718  tot_n42_230209    nauplii<Copepoda 2023-02-09     10  3.5342   
11719  tot_n42_230209    nauplii<Copepoda 2023-02-09     10  3.5342   

       Concentration  Min_depth  
0          18.108766       50.0  
1          18.108766       50.0  
2          18.108766       50.0  
3          