<a href="https://colab.research.google.com/github/JerKeller/MP-Ecotaxa/blob/main/ecotaxa_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Importing libraries
import pandas as pd
import numpy as np
import os
import sys
import subprocess

In [8]:
ecotaxa_export = '/content/ecotaxa_export.tsv'
tsv_read = pd.read_csv(ecotaxa_export, sep='\t')

In [10]:
# Load data and extract columns needed
sub1 = tsv_read[['object_id', 'object_date', 'object_depth_min', 'object_depth_max', 'object_annotation_category',
                 'object_area', 'object_area_exc', 'object_major', 'object_minor', 'sample_tot_vol', 'process_particle_pixel_size_mm',
                 'acq_id']]

In [11]:
# Modify names of columns
sub1.columns = ['Label', 'Date', 'Min_depth', 'Max_depth', 'Taxa', 'area', 'areaExc', 'major', 'minor',
                'Tow_Vol', 'pixel_size_mm', 'acq_id']

# Remove not living organisms
sub2 = sub1[(sub1['Taxa'] != 'badfocus<artefact') & (sub1['Taxa'] != 'fiber<detritus') & 
            (sub1['Taxa'] != 'multiple<other') & (sub1['Taxa'] != 'detritus') & 
            (sub1['Taxa'] != 'egg sac<egg') & (sub1['Taxa'] != 't001') & 
            (sub1['Taxa'] != 't002') & (sub1['Taxa'] != 't003') & 
            (sub1['Taxa'] != 'othertocheck') & (sub1['Taxa'] != 'artefact') & 
            (sub1['Taxa'] != 'bubble') & (sub1['Taxa'] != 'dead<Copepoda')].copy()


In [12]:
sub2watercol = sub2[~sub2['acq_id'].isin(['tot_n10_221027', 'tot_n9_221013', 'tot_n14_221103',
                                           'tot_n19_221110', 'tot_n24_221221', 'tot_n25_221221',
                                           'tot_n26_221221', 'tot_n27_221221', 'tot_n28_221221',
                                           'tot_n29_221221'])].copy()

In [13]:
# Convert date to datetime format
sub2watercol['Date'] = pd.to_datetime(sub2watercol['Date'], format='%Y%m%d')

In [1]:







#######################Equations to compute Biovolume##########################

# Area (mm2) = object_area x (process_particle_pixel_size_mm)2
# Area excluded (mm2) = object_area_exc x (process_particle_pixel_size_mm)2
# Major (mm) = object_major x process_particle_pixel_size_mm
# Minor (mm) = object_minor x process_particle_pixel_size_mm

import pandas as pd
import numpy as np

# Conversions from pixel to mm
sub2watercol['Area_mm2'] = sub2watercol['area'] * ((sub2watercol['pixel_size_mm']) ** 2)
sub2watercol['AreaExc_mm2'] = sub2watercol['areaExc'] * ((sub2watercol['pixel_size_mm']) ** 2)
sub2watercol['Major_mm'] = sub2watercol['major'] * sub2watercol['pixel_size_mm']
sub2watercol['Minor_mm'] = sub2watercol['minor'] * sub2watercol['pixel_size_mm']

# Concentration = Number of individuals in the sample/m3
# Concentration = nb. ind./m3 = (object_annotation_category x acq_sub_part) / sample_tot_vol

# Conversions from pixel to mm
sub2['Area_mm2'] = sub2['area'] * ((sub2watercol['pixel_size_mm']) ** 2)
sub2['AreaExc_mm2'] = sub2['areaExc'] * ((sub2watercol['pixel_size_mm']) ** 2)
sub2['Major_mm'] = sub2['major'] * sub2watercol['pixel_size_mm']
sub2['Minor_mm'] = sub2['minor'] * sub2watercol['pixel_size_mm']

#######################Biovolume##########################

###Plain biovolume###

# Radius of a circle = r_mm = √ (Area (mm2) / ∏)
sub2['r_mm'] = np.sqrt(sub2['Area_mm2'] / np.pi)
# Spherical Volume = V_mm3 = 4/3 x ∏ x r3
sub2['V_mm3_plain'] = (4 / 3) * np.pi * ((sub2['r_mm']) ** 3)
# Biovolume = Bv_mm3/m3 = (Spherical Volume x acq_sub_part) / sample_tot_vol
sub2['Biovolume_plain'] = sub2['V_mm3_plain'] / sub2['Tow_Vol']

###Ellipsoide biovolume###

# Ellipsoidal Volume = V (mm3) = 4/3 x ∏ x [ (Major(mm)/2) x (Minor(mm)/2) x (Minor(mm)/2) ]
sub2watercol['V_mm3'] = (4 / 3) * np.pi * ((sub2watercol['Major_mm'] / 2) * (sub2watercol['Minor_mm'] / 2) * (sub2watercol['Minor_mm'] / 2))

# Creates a factor which allows binning by the log of the biovolume (calculated in the previous line).
sub2watercol['logvol'] = np.log10(sub2watercol['V_mm3'])

# Biovolume = Bv (mm3/m3) = (Spherical Volume x acq_sub_part) / sample_tot_vol
# sub2watercol['Biovolume_ellips_mm3_m2'] = sub2watercol['V_mm3'] / sub2watercol['Tow_Vol']

# Grouper les biovolumes par dates et espèces
# grouped_data_biovol <- aggregate(Biovolume_ellips_mm3_m2 ~ Date + Taxa, data = sub2, FUN = sum)

# Group the data by date and taxa
grouped_data = sub2watercol.groupby(['acq_id', 'Taxa', 'Date'])

# Count the number of occurrences for each species on each date
species_count



SyntaxError: ignored