### RULES
Requires format of skipped line between different phylums (ex. empty row above Diatom, Dinoflagellate, etc.)

Assumed all {Ochromonas, } are mixotrophs.

1. assume everything after "Unknown flagellates" is irrelevant (to be deleted)
2. diatoms are NOT mixotrophs
3. remove all "[name]-like" (without genus specified)
4. remove all "[genus name] spp." AND "[genus name] sp."
5. check "cysts of"

Status Key--  
Confirmed := explicitly in the Mixotroph Database  
Unsure (sp. in mdb) := genus in Mixotroph Database lists "[genus name] sp." (ex. Ochromonas sp. for Ochromonas danica)  
Unsure (inexact name):= LIS name is in a longer Mixotroph Database name or vice versa (ex. Chattonella marina in Chattonella marina var. ovata)   

In [1]:
import pandas as pd
import numpy as np
from constants import *
import warnings

warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
pd.set_option("future.no_silent_downcasting", True)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option("future.no_silent_downcasting", True)

class Block:
    def __init__(self, ind, df):
        self.ind = ind
        self.df = df

confirmed_genus_before=["Ochromonas"]
confirmed_species_before=['Chattonella marina']

csv_name = "2016-final.xlsx - Total counts"
lis = pd.read_csv(f"inputs/{csv_name}.csv")
orig_header = lis.columns 

mdb = pd.read_csv(MDB_PATH)
mdb.columns = mdb.iloc[1]
mdb = mdb.drop([0, 1]).reset_index(drop=True)
mdb['Species Name'] = mdb['Species Name'].str.replace(r'sp$', 'sp.', regex=True) # edit so that species ending in "sp" now end in "sp."


mdb_volume = mdb.loc[:, ['size class', 'L (μm)', 'W (μm) or diameter (μm)']]
# Step 1: Handle # - # and ~# - #, calculate the average
mdb_volume[['L (μm)', 'W (μm) or diameter (μm)']] = mdb_volume[['L (μm)', 'W (μm) or diameter (μm)']].replace(r'[~]?(\d+)-(\d+)', lambda x: str((float(x.group(1)) + float(x.group(2))) / 2), regex=True)
# Step 2: Remove ~, ≤ symbols, and keep only the numbers
mdb_volume[['L (μm)', 'W (μm) or diameter (μm)']] = mdb_volume[['L (μm)', 'W (μm) or diameter (μm)']].replace(r'[~≤]', '', regex=True)
# Step 3: Replace cells with non-numeric values with NaN
mdb_volume[['L (μm)', 'W (μm) or diameter (μm)']] = mdb_volume[['L (μm)', 'W (μm) or diameter (μm)']].apply(pd.to_numeric, errors='coerce')
# Step 4: Drop rows where either 'length' or 'width' contain both numbers and strings
mdb_volume = mdb_volume.dropna(subset=['L (μm)', 'W (μm) or diameter (μm)'])
# Calculate sphere volume (V = (4/3) * π * (d/2)^3) when length and width are equal
mdb_volume['Volume'] = np.where(mdb_volume['L (μm)'] == mdb_volume['W (μm) or diameter (μm)'], (4/3) * np.pi * (mdb_volume['W (μm) or diameter (μm)'] / 2) ** 3, np.nan)
# Calculate oblate spheroid volume where length and width are numeric and not equal
mdb_volume['Volume'] = np.where((mdb_volume['L (μm)'].notna()) & (mdb_volume['W (μm) or diameter (μm)'].notna()) & (mdb_volume['L (μm)'] != mdb_volume['W (μm) or diameter (μm)']), (4/3) * np.pi * ((mdb_volume['W (μm) or diameter (μm)'] / 2) ** 2) * (mdb_volume['L (μm)'] / 2), mdb_volume['Volume'])
# Get average volumes for each size class
mdb_volume = mdb_volume.drop(['L (μm)', 'W (μm) or diameter (μm)'], axis=1).groupby('size class', as_index=False).mean()

In [2]:
## clean_lis

phylum_ind = lis[lis.iloc[:, 0] == "Phylum"].index[0]
lis.columns = lis.iloc[phylum_ind]  # reset column headers
lis = lis.iloc[phylum_ind+2:].reset_index(drop=True)

# remove rows after unknown flagellates
unknown_flagellates_ind = lis[lis["Phylum"] == "Unknown flagellates"].index[0] 
lis = lis.iloc[:unknown_flagellates_ind]
lis = lis.iloc[:lis.last_valid_index()+1]  # remove trailing nan rows

# remove rows that contain "TOTAL"
lis = lis[~lis["Phylum"].str.contains("TOTAL", na=False)].reset_index(drop=True)  

# construct correct phylum column
actual_phylum_ind = lis[lis["Species"].isna() & lis["Phylum"].isna()].index + 1
lis = lis.rename(columns={"Phylum": "Genus"}) # rename phylum column to genus
lis.insert(0, 'Phylum', lis["Genus"].iloc[actual_phylum_ind])  # reconstruct phylum column
lis['Phylum'] = lis['Phylum'].ffill()  # forwardfill phylum

lis['Genus'] = lis['Species'].str.split().str[0]  # fill genus using first word of species name

lis = lis.dropna(subset=['Species']).reset_index(drop=True) # delete rows with na in Species column

# # ensure numerical values are floats and not strings
lis = lis.fillna(0)

SPECIES_COL = lis.columns.get_loc("Species")
lis.iloc[:, SPECIES_COL+1:] = lis.iloc[:, SPECIES_COL+1:].replace(",| ", "", regex=True).replace("", 0).astype(float).astype(int)

# add totals for each row
lis['Totals'] = lis.loc[:, ~lis.columns.isin(['Status', 'Phylum', 'Genus', 'Species'])].sum(axis=1)
lis = pd.concat([lis.iloc[:, :3], lis.iloc[:, -1:], lis.iloc[:, 3:-1]], axis=1)

lis

Unnamed: 0,Phylum,Genus,Species,Totals,1/8/16,1/8/16.1,1/8/16.2,1/8/16.3,1/8/16.4,1/8/16.5,1/8/16.6,1/8/16.7,1/8/16.8,1/7/16,1/8/16.9,1/8/16.10,1/8/16.11,1/8/16.12,1/8/16.13,1/8/16.14,1/8/16.15,1/8/16.16,1/8/16.17,1/7/16.1,2/1/16,2/1/16.1,2/1/16.2,2/1/16.3,2/2/16,2/2/16.1,2/2/16.2,2/4/16,2/4/16.1,2/4/16.2,2/1/16.4,2/1/16.5,2/1/16.6,2/1/16.7,2/2/16.3,2/2/16.4,2/2/16.5,2/4/16.3,2/4/16.4,2/4/16.5,3/8/16,3/8/16.1,3/8/16.2,3/8/16.3,3/9/16,3/9/16.1,3/9/16.2,3/10/16,3/10/16.1,3/10/16.2,3/8/16.4,3/8/16.5,3/8/16.6,3/8/16.7,3/9/16.3,3/9/16.4,3/9/16.5,3/10/16.3,3/10/16.4,3/10/16.5,4/4/16,4/4/16.1,4/4/16.2,4/4/16.3,4/4/16.4,4/6/16,4/6/16.1,4/13/16,4/13/16.1,4/13/16.2,4/4/16.5,4/4/16.6,4/4/16.7,4/4/16.8,4/4/16.9,4/6/16.2,4/6/16.3,4/13/16.3,4/13/16.4,4/13/16.5,5/12/16,5/12/16.1,5/12/16.2,5/12/16.3,5/12/16.4,5/18/16,5/18/16.1,5/11/16,5/11/16.1,5/11/16.2,5/12/16.5,5/12/16.6,5/12/16.7,5/12/16.8,5/12/16.9,5/18/16.2,5/18/16.3,5/11/16.3,5/11/16.4,5/11/16.5,6/10/16,6/10/16.1,6/10/16.2,6/10/16.3,6/14/16,6/14/16.1,6/14/16.2,6/14/16.3,6/8/16,6/8/16.1,6/10/16.4,6/10/16.5,6/10/16.6,6/10/16.7,6/14/16.4,6/14/16.5,6/14/16.6,6/14/16.7,6/8/16.2,6/8/16.3,7/6/16,7/6/16.1,7/6/16.2,7/6/16.3,7/7/16,7/7/16.1,7/7/16.2,7/5/16,7/5/16.1,7/5/16.2,7/6/16.4,7/6/16.5,7/6/16.6,7/6/16.7,7/7/16.3,7/7/16.4,7/7/16.5,7/5/16.3,7/5/16.4,7/5/16.5,8/3/16,8/3/16.1,8/3/16.2,8/4/16,8/4/16.1,8/4/16.2,8/4/16.3,8/1/16,8/1/16.1,8/1/16.2,8/3/16.3,8/3/16.4,8/3/16.5,8/4/16.4,8/4/16.5,8/4/16.6,8/4/16.7,8/1/16.3,8/1/16.4,8/1/16.5,8/30/16,8/30/16.1,8/30/16.2,8/30/16.3,8/30/16.4,8/31/16,8/31/16.1,8/29/16,8/29/16.1,8/29/16.2,8/30/16.5,8/30/16.6,8/30/16.7,8/30/16.8,8/30/16.9,8/31/16.2,8/31/16.3,8/29/16.3,8/29/16.4,8/29/16.5,10/5/16,10/5/16.1,10/5/16.2,10/5/16.3,10/4/16,10/4/16.1,10/4/16.2,10/3/16,10/3/16.1,10/3/16.2,10/5/16.4,10/5/16.5,10/5/16.6,10/5/16.7,10/4/16.3,10/4/16.4,10/4/16.5,10/3/16.3,10/3/16.4,10/3/16.5,11/9/16,11/9/16.1,11/9/16.2,11/9/16.3,11/8/16,11/8/16.1,11/8/16.2,11/7/16,11/7/16.1,11/7/16.2,11/9/16.4,11/9/16.5,11/9/16.6,11/9/16.7,11/8/16.3,11/8/16.4,11/8/16.5,11/7/16.3,11/7/16.4,11/7/16.5,12/5/16,12/5/16.1,12/5/16.2,12/5/16.3,12/6/16,12/6/16.1,12/6/16.2,11/30/16,11/30/16.1,11/30/16.2,12/5/16.4,12/5/16.5,12/5/16.6,12/5/16.7,12/6/16.3,12/6/16.4,12/6/16.5,11/30/16.3,11/30/16.4,11/30/16.5
0,Diatom,Achnanthes,Achnanthes spp.,191136,1452,1452,1452,1452,1452,1452,0,1452,1452,0,0,0,0,1452,2904,0,2904,4400,0,0,2904,0,0,0,0,1452,0,0,1452,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,8800,0,1452,4400,4400,4400,0,0,0,0,0,2904,1452,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,1452,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,4400,4400,0,0,1452,1452,0,0,4400,1452,0,1452,1452,4400,8800,1452,2904,2904,1452,2904,8800,13200,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8800,0,1452,0,0,0,0,0,0,0,1452,0,0,0,2904,0,1452,1452,0,0,8800,0,1452,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,1452,0,0,0,0,0,1452,1452,1452,1452,0,0,0,0,0,0,1452,0,0,1452,0,1452,0,0,0,0,0,1452,0,0,0,0,0,0,2904,0,0,0,0,0,0,1452,0,0,0,2904,0,0,0,0,0,0,1452,0,0,0,2904,0,0,0,0,0,1452,0,2904
1,Diatom,Actinocyclus,Actinocyclus spp.,32076,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,1452,1452,0,0,0,0,4400,1452,1452,1452,2904,0,1452,4400,0,0,0,0,0,0,0,2904,0,0,0,0,0,0,0,0,1452,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Diatom,Actinoptychus,Actinoptychus senarius,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Diatom,Actinoptychus,Actinoptychus undulatus,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Diatom,Amphiprora,Amphiprora spp.,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Diatom,Amphora,Amphora spp.,197208,1452,0,0,0,1452,0,0,0,0,0,2904,0,1452,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,1452,8800,0,0,0,0,0,0,0,1452,0,0,0,0,1452,1452,0,0,0,0,0,0,0,0,35200,5852,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,8800,4400,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,1452,0,0,1452,0,4400,1452,0,0,1452,0,1452,0,2904,0,1452,2904,2904,0,1452,0,0,0,0,8800,22000,2904,0,1452,2904,0,1452,1452,1452,13200,4400,2904,2904,1452,0,0,0,0,1452,1452,2904,0,0,1452,1452,0,0,2904,5852,0,1452,2904,0,0,0
6,Diatom,Asterionellopsis,Asterionellopsis glacialis (Asterionella glaci...,3490124,0,0,1452,0,0,0,0,0,1452,1452,0,0,1452,1452,0,0,1452,0,17600,0,1452,0,2904,0,0,1452,0,1452,2904,52800,0,4400,1452,0,1452,0,0,0,10252,35200,48400,13200,0,17600,0,44000,0,532400,171600,96800,118800,70400,114400,26400,48400,127600,66000,110000,281600,145200,127600,325600,184800,8800,8800,1452,0,13200,35200,26400,61600,0,1452,4400,13200,2904,8800,61600,0,0,4400,0,0,0,0,0,0,1452,4400,1452,13200,14652,1452,1452,1452,1452,0,8800,17600,22000,1452,0,0,0,0,0,0,0,13200,1452,0,0,0,1452,0,13200,1452,1452,13200,8800,0,0,0,0,0,0,0,0,0,4400,0,0,0,1452,0,0,8800,26400,0,8800,0,0,0,0,0,0,0,35200,0,105600,0,0,0,0,0,0,0,61600,0,22000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Diatom,Asteroplanus,Asteroplanus karianus,2904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Diatom,Bacillaria,Bacillaria paxillifer,123024,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7304,0,0,0,0,0,0,0,0,0,0,17600,0,0,0,0,0,10252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10252,0,0,0,0,0,0,0,5852,7304,0,0,0,0,0,0,0,0,2904,0,0,0,0,0,0,0,7304,0,0,8800
9,Diatom,Bacteriastrum,Bacteriastrum spp.,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [3]:
## classify_lis

# add Status column
lis = lis.copy()
lis.insert(0, 'Status', None)

# store blocks of known mixotroph genuses (given beforehand) 
known_blocks = []
for genus in confirmed_genus_before:
    ind = lis[lis["Species"].str.contains(genus)].index
    df = lis.iloc[ind]
    known_blocks.append(Block(ind, df))
for species in confirmed_species_before:
    ind = lis[lis["Species"].str.contains(species)].index
    df = lis.iloc[ind]
    known_blocks.append(Block(ind, df))

# remove based on hard coded rules (NOT RESETTING INDEX IN ORDER TO ADD CONFIRMED_BEFORE GENUSES BACK CORRECTLY)
lis = lis[~lis["Species"].str.contains("unknown|other|cysts")]
lis = lis[~lis["Species"].str.contains("-like")] # remove species ending with "-like"
lis = lis[~lis["Species"].str.contains("sp.|spp.")]  # remove all sp. / spp.

# add back stored blocks of known mixotrophs and mark as Yes
for known_block in known_blocks:
    lis = pd.concat([lis, known_block.df]).sort_index().drop_duplicates()
    lis.loc[known_block.ind, "Status"] = "Yes"

# check if (in none status) direct match and mark all Trues as "Yes"
filtered = lis[lis['Status'].isnull()]["Species"].isin(mdb['Species Name'])
lis.loc[filtered[filtered].index, "Status"] = "Yes"
        
# check (in remaining none status) if the genus has sp. and mark all Trues as "Unsure (sp. in mdb)"
genus_to_check = lis[lis['Status'].isnull()]['Species'].str.split().str[0].drop_duplicates() + " sp."
filtered = genus_to_check.isin(mdb['Species Name'])
lis.loc[filtered[filtered].index, "Status"] = "Unsure (sp. mdb)"

# check (in remaining none status) if the name is contained in the mdb and vice versa and mark all Trues as "Unsure (inexact name)"
filtered = lis[lis['Status'].isnull()]["Species"].apply(lambda x: mdb["Species Name"].str.contains(x, regex=False).any())
lis.loc[filtered[filtered].index, "Status"] = "Unsure (inexact name)"
        
pattern = '|'.join(mdb['Species Name'])
filtered = lis[lis['Status'].isnull()]["Species"].str.contains(pattern, regex=True)
lis.loc[filtered[filtered].index, "Status"] = "Unsure (inexact name)"
        
# replace None's in Status with No's 
lis["Status"] = lis["Status"].replace(np.nan, 'No')

lis = lis.reset_index(drop=True)

lis

Unnamed: 0,Status,Phylum,Genus,Species,Totals,1/8/16,1/8/16.1,1/8/16.2,1/8/16.3,1/8/16.4,1/8/16.5,1/8/16.6,1/8/16.7,1/8/16.8,1/7/16,1/8/16.9,1/8/16.10,1/8/16.11,1/8/16.12,1/8/16.13,1/8/16.14,1/8/16.15,1/8/16.16,1/8/16.17,1/7/16.1,2/1/16,2/1/16.1,2/1/16.2,2/1/16.3,2/2/16,2/2/16.1,2/2/16.2,2/4/16,2/4/16.1,2/4/16.2,2/1/16.4,2/1/16.5,2/1/16.6,2/1/16.7,2/2/16.3,2/2/16.4,2/2/16.5,2/4/16.3,2/4/16.4,2/4/16.5,3/8/16,3/8/16.1,3/8/16.2,3/8/16.3,3/9/16,3/9/16.1,3/9/16.2,3/10/16,3/10/16.1,3/10/16.2,3/8/16.4,3/8/16.5,3/8/16.6,3/8/16.7,3/9/16.3,3/9/16.4,3/9/16.5,3/10/16.3,3/10/16.4,3/10/16.5,4/4/16,4/4/16.1,4/4/16.2,4/4/16.3,4/4/16.4,4/6/16,4/6/16.1,4/13/16,4/13/16.1,4/13/16.2,4/4/16.5,4/4/16.6,4/4/16.7,4/4/16.8,4/4/16.9,4/6/16.2,4/6/16.3,4/13/16.3,4/13/16.4,4/13/16.5,5/12/16,5/12/16.1,5/12/16.2,5/12/16.3,5/12/16.4,5/18/16,5/18/16.1,5/11/16,5/11/16.1,5/11/16.2,5/12/16.5,5/12/16.6,5/12/16.7,5/12/16.8,5/12/16.9,5/18/16.2,5/18/16.3,5/11/16.3,5/11/16.4,5/11/16.5,6/10/16,6/10/16.1,6/10/16.2,6/10/16.3,6/14/16,6/14/16.1,6/14/16.2,6/14/16.3,6/8/16,6/8/16.1,6/10/16.4,6/10/16.5,6/10/16.6,6/10/16.7,6/14/16.4,6/14/16.5,6/14/16.6,6/14/16.7,6/8/16.2,6/8/16.3,7/6/16,7/6/16.1,7/6/16.2,7/6/16.3,7/7/16,7/7/16.1,7/7/16.2,7/5/16,7/5/16.1,7/5/16.2,7/6/16.4,7/6/16.5,7/6/16.6,7/6/16.7,7/7/16.3,7/7/16.4,7/7/16.5,7/5/16.3,7/5/16.4,7/5/16.5,8/3/16,8/3/16.1,8/3/16.2,8/4/16,8/4/16.1,8/4/16.2,8/4/16.3,8/1/16,8/1/16.1,8/1/16.2,8/3/16.3,8/3/16.4,8/3/16.5,8/4/16.4,8/4/16.5,8/4/16.6,8/4/16.7,8/1/16.3,8/1/16.4,8/1/16.5,8/30/16,8/30/16.1,8/30/16.2,8/30/16.3,8/30/16.4,8/31/16,8/31/16.1,8/29/16,8/29/16.1,8/29/16.2,8/30/16.5,8/30/16.6,8/30/16.7,8/30/16.8,8/30/16.9,8/31/16.2,8/31/16.3,8/29/16.3,8/29/16.4,8/29/16.5,10/5/16,10/5/16.1,10/5/16.2,10/5/16.3,10/4/16,10/4/16.1,10/4/16.2,10/3/16,10/3/16.1,10/3/16.2,10/5/16.4,10/5/16.5,10/5/16.6,10/5/16.7,10/4/16.3,10/4/16.4,10/4/16.5,10/3/16.3,10/3/16.4,10/3/16.5,11/9/16,11/9/16.1,11/9/16.2,11/9/16.3,11/8/16,11/8/16.1,11/8/16.2,11/7/16,11/7/16.1,11/7/16.2,11/9/16.4,11/9/16.5,11/9/16.6,11/9/16.7,11/8/16.3,11/8/16.4,11/8/16.5,11/7/16.3,11/7/16.4,11/7/16.5,12/5/16,12/5/16.1,12/5/16.2,12/5/16.3,12/6/16,12/6/16.1,12/6/16.2,11/30/16,11/30/16.1,11/30/16.2,12/5/16.4,12/5/16.5,12/5/16.6,12/5/16.7,12/6/16.3,12/6/16.4,12/6/16.5,11/30/16.3,11/30/16.4,11/30/16.5
0,No,Diatom,Actinoptychus,Actinoptychus senarius,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,No,Diatom,Actinoptychus,Actinoptychus undulatus,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,No,Diatom,Asterionellopsis,Asterionellopsis glacialis (Asterionella glaci...,3490124,0,0,1452,0,0,0,0,0,1452,1452,0,0,1452,1452,0,0,1452,0,17600,0,1452,0,2904,0,0,1452,0,1452,2904,52800,0,4400,1452,0,1452,0,0,0,10252,35200,48400,13200,0,17600,0,44000,0,532400,171600,96800,118800,70400,114400,26400,48400,127600,66000,110000,281600,145200,127600,325600,184800,8800,8800,1452,0,13200,35200,26400,61600,0,1452,4400,13200,2904,8800,61600,0,0,4400,0,0,0,0,0,0,1452,4400,1452,13200,14652,1452,1452,1452,1452,0,8800,17600,22000,1452,0,0,0,0,0,0,0,13200,1452,0,0,0,1452,0,13200,1452,1452,13200,8800,0,0,0,0,0,0,0,0,0,4400,0,0,0,1452,0,0,8800,26400,0,8800,0,0,0,0,0,0,0,35200,0,105600,0,0,0,0,0,0,0,61600,0,22000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,No,Diatom,Asteroplanus,Asteroplanus karianus,2904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,No,Diatom,Bacillaria,Bacillaria paxillifer,123024,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7304,0,0,0,0,0,0,0,0,0,0,17600,0,0,0,0,0,10252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10252,0,0,0,0,0,0,0,5852,7304,0,0,0,0,0,0,0,0,2904,0,0,0,0,0,0,0,7304,0,0,8800
5,No,Diatom,Cerataulina,Cerataulina pelagica,10459812,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30800,0,0,4400,0,8800,8800,0,1452,0,0,0,0,0,0,0,0,0,88000,13200,22000,8800,13200,8800,0,22000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4400,4400,22000,88000,13200,0,0,0,0,4400,1452,0,4400,1452,26400,0,0,0,1452,1056000,836000,704000,70400,8800,22000,180400,1447600,0,1298000,220000,30800,0,30800,17600,30800,154000,2684000,0,699600,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,2904,0,0,0,44000,96800,44000,26400,8800,2904,0,0,8800,8800,0,1452,0,0,0,0,0,0,8800,5852,8800,1452,13200,2904,4400,8800,5852,26400,48400,8800,0,1452,0,0,0,1452,13200,13200,8800,1452,13200,22000,17600,4400,0,0,0,0,1452,0,4400,17600,8800,0,0,1452,2904,0,1452,0,17600,0,0,0,13200,1452,2904,0,2904,1452,0,0,0,0,0,0,1452,0,0,1452
6,No,Diatom,Chaetoceros,Chaetoceros compressus,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,No,Diatom,Chaetoceros,Chaetoceros constrictus,30800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,30800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,No,Diatom,Chaetoceros,Chaetoceros curvisetus,114400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,35200,22000,26400,30800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,No,Diatom,Chaetoceros,Chaetoceros danicus,125840,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,4400,0,0,0,0,0,0,13200,1452,8800,8800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2904,0,0,0,0,0,0,0,0,0,0,0,1452,1452,2904,2904,4400,0,0,0,4400,1452,0,30800,0,0,8800,0,2904,8800,7304,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2904,0,0,0,0


In [4]:
## only_mixotrophs
# drop all rows with Status = "No"
lis = lis[lis["Status"] != "No"].reset_index(drop=True)

# merge additional columns from mdb
lis = pd.merge(lis, mdb[['Species Name', 'MFT', 'Evidence of mixoplankton activity', 'size class', 'L (μm)', 'W (μm) or diameter (μm)']], left_on='Species', right_on='Species Name', how='left').drop(columns=['Species Name']).reset_index(drop=True) 
lis = pd.concat([lis.iloc[:, :4], lis.iloc[:, -5:], lis.iloc[:, 4:-5]], axis=1)
lis[['MFT', 'Evidence of mixoplankton activity', 'size class', 'L (μm)', 'W (μm) or diameter (μm)']] = lis[['MFT', 'Evidence of mixoplankton activity', 'size class', 'L (μm)', 'W (μm) or diameter (μm)']].fillna("")

# clean length/width columns
lis[['L (μm)', 'W (μm) or diameter (μm)']] = lis[['L (μm)', 'W (μm) or diameter (μm)']].replace(r'[^\d\-]', '', regex=True).apply(lambda col: col.str.split('-').apply(lambda x: (float(x[0]) + float(x[1])) / 2 if len(x) == 2 else float(x[0]) if x[0] else None))

# calculate sphere volume (V = (4/3) * π * (d/2)^3) when length and width are equal
lis['Volume'] = np.where(lis['L (μm)'] == lis['W (μm) or diameter (μm)'], (4/3) * np.pi * (lis['W (μm) or diameter (μm)'] / 2) ** 3, np.nan)

# calculate oblate spheroid volume where length and width are numeric and not equal
lis['Volume'] = np.where((lis['L (μm)'].notna()) & (lis['W (μm) or diameter (μm)'].notna()) & (lis['L (μm)'] != lis['W (μm) or diameter (μm)']), (4/3) * np.pi * ((lis['W (μm) or diameter (μm)'] / 2) ** 2) * (lis['L (μm)'] / 2), lis['Volume'])

# manually add values for additional columns from mdb for confirmed_befores
lis.loc[(lis['Genus'] == 'Ochromonas'), ['MFT', 'Evidence of mixoplankton activity', 'size class']] = ['CM', 'uptake of eubacteria', 'nano']
lis.loc[(lis['Species'] == 'Chattonella marina'), ['MFT', 'Evidence of mixoplankton activity', 'size class']] = ['CM', 'uptake of eubacteria', 'micro']

# fill unknown volumes with averages from mdb_volume based on size class and convert
lis['Volume'] = lis['Volume'].fillna(lis.merge(mdb_volume, on='size class', how='left')['Volume_y']) 
lis['Total Biomass (pgC)'] = (((lis['Volume'])**0.939) * 0.216) * lis['Totals']
lis = lis.rename(columns={'Volume':'Volume (µm³/cell)'})

lis = pd.concat([lis.iloc[:, :9], lis.iloc[:, -2:], lis.iloc[:, 9:-2]], axis=1)

# remove Status, length, and width columns
lis = lis.drop(columns=["Status", 'L (μm)', 'W (μm) or diameter (μm)'])

lis     

Unnamed: 0,Phylum,Genus,Species,MFT,Evidence of mixoplankton activity,size class,Volume (µm³/cell),Total Biomass (pgC),Totals,1/8/16,1/8/16.1,1/8/16.2,1/8/16.3,1/8/16.4,1/8/16.5,1/8/16.6,1/8/16.7,1/8/16.8,1/7/16,1/8/16.9,1/8/16.10,1/8/16.11,1/8/16.12,1/8/16.13,1/8/16.14,1/8/16.15,1/8/16.16,1/8/16.17,1/7/16.1,2/1/16,2/1/16.1,2/1/16.2,2/1/16.3,2/2/16,2/2/16.1,2/2/16.2,2/4/16,2/4/16.1,2/4/16.2,2/1/16.4,2/1/16.5,2/1/16.6,2/1/16.7,2/2/16.3,2/2/16.4,2/2/16.5,2/4/16.3,2/4/16.4,2/4/16.5,3/8/16,3/8/16.1,3/8/16.2,3/8/16.3,3/9/16,3/9/16.1,3/9/16.2,3/10/16,3/10/16.1,3/10/16.2,3/8/16.4,3/8/16.5,3/8/16.6,3/8/16.7,3/9/16.3,3/9/16.4,3/9/16.5,3/10/16.3,3/10/16.4,3/10/16.5,4/4/16,4/4/16.1,4/4/16.2,4/4/16.3,4/4/16.4,4/6/16,4/6/16.1,4/13/16,4/13/16.1,4/13/16.2,4/4/16.5,4/4/16.6,4/4/16.7,4/4/16.8,4/4/16.9,4/6/16.2,4/6/16.3,4/13/16.3,4/13/16.4,4/13/16.5,5/12/16,5/12/16.1,5/12/16.2,5/12/16.3,5/12/16.4,5/18/16,5/18/16.1,5/11/16,5/11/16.1,5/11/16.2,5/12/16.5,5/12/16.6,5/12/16.7,5/12/16.8,5/12/16.9,5/18/16.2,5/18/16.3,5/11/16.3,5/11/16.4,5/11/16.5,6/10/16,6/10/16.1,6/10/16.2,6/10/16.3,6/14/16,6/14/16.1,6/14/16.2,6/14/16.3,6/8/16,6/8/16.1,6/10/16.4,6/10/16.5,6/10/16.6,6/10/16.7,6/14/16.4,6/14/16.5,6/14/16.6,6/14/16.7,6/8/16.2,6/8/16.3,7/6/16,7/6/16.1,7/6/16.2,7/6/16.3,7/7/16,7/7/16.1,7/7/16.2,7/5/16,7/5/16.1,7/5/16.2,7/6/16.4,7/6/16.5,7/6/16.6,7/6/16.7,7/7/16.3,7/7/16.4,7/7/16.5,7/5/16.3,7/5/16.4,7/5/16.5,8/3/16,8/3/16.1,8/3/16.2,8/4/16,8/4/16.1,8/4/16.2,8/4/16.3,8/1/16,8/1/16.1,8/1/16.2,8/3/16.3,8/3/16.4,8/3/16.5,8/4/16.4,8/4/16.5,8/4/16.6,8/4/16.7,8/1/16.3,8/1/16.4,8/1/16.5,8/30/16,8/30/16.1,8/30/16.2,8/30/16.3,8/30/16.4,8/31/16,8/31/16.1,8/29/16,8/29/16.1,8/29/16.2,8/30/16.5,8/30/16.6,8/30/16.7,8/30/16.8,8/30/16.9,8/31/16.2,8/31/16.3,8/29/16.3,8/29/16.4,8/29/16.5,10/5/16,10/5/16.1,10/5/16.2,10/5/16.3,10/4/16,10/4/16.1,10/4/16.2,10/3/16,10/3/16.1,10/3/16.2,10/5/16.4,10/5/16.5,10/5/16.6,10/5/16.7,10/4/16.3,10/4/16.4,10/4/16.5,10/3/16.3,10/3/16.4,10/3/16.5,11/9/16,11/9/16.1,11/9/16.2,11/9/16.3,11/8/16,11/8/16.1,11/8/16.2,11/7/16,11/7/16.1,11/7/16.2,11/9/16.4,11/9/16.5,11/9/16.6,11/9/16.7,11/8/16.3,11/8/16.4,11/8/16.5,11/7/16.3,11/7/16.4,11/7/16.5,12/5/16,12/5/16.1,12/5/16.2,12/5/16.3,12/6/16,12/6/16.1,12/6/16.2,11/30/16,11/30/16.1,11/30/16.2,12/5/16.4,12/5/16.5,12/5/16.6,12/5/16.7,12/6/16.3,12/6/16.4,12/6/16.5,11/30/16.3,11/30/16.4,11/30/16.5
0,Dinoflagellate,Akashiwo,Akashiwo sanguinea,CM,"ingestion of ciliates, Isochrysis, Cryptophyte...",micro,58643.06,18828069.502962,2904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Dinoflagellate,Amphidinium,Amphidinium carterae,CM,ingestion of Skeletonema costatum,nano,7422.013,15604550.754418,16764,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,8800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,1452,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,704,0,0,0,0,0,0,0,0,0
2,Dinoflagellate,Dinophysis,Dinophysis acuminata,pSNCM,photosynthetic Dinophysis spp. obtain plastids...,micro,25735.93,257891929.305965,86196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,8800,4400,1452,1452,0,0,0,1452,0,0,0,0,0,0,0,1452,1452,0,0,4400,2904,1452,1452,10252,13200,0,4400,0,0,4400,1452,0,0,0,1452,0,4400,0,0,2904,1452,2904,2904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Dinoflagellate,Dinophysis,Dinophysis miles,pSNCM*,This species retains chloroplasts from cryptop...,micro,301592.9,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Dinoflagellate,Dinophysis,Dinophysis norvegica,pSNCM,photosynthetic Dinophysis spp. obtain plastids...,micro,32070.43,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Dinoflagellate,Gambierdiscus,Gambierdiscus toxicus,CM,"presence of feeding vacuoles, unknown prey",micro,217309.2,647047708.998784,29172,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,1452,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2904,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,1452,0,0,0,0,0,1452,8800,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,4400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Dinoflagellate,Gonyaulax,Gonyaulax polygramma,CM,"ingestion of cryptophyte species, Amphidinium ...",micro,1948311.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Dinoflagellate,Heterocapsa,Heterocapsa circularisquama,CM,bacteria in food vacuoles,nano,1840777.0,252419002007.8688,1530496,13200,88000,70400,0,0,0,8800,13200,0,8800,8800,8800,13200,0,1452,1452,5852,4400,0,0,17600,0,8800,0,0,1452,13200,48400,0,0,17600,1452,8800,1452,0,0,0,0,0,0,2904,0,8800,2904,1452,8800,8800,8800,0,0,8800,0,8800,0,0,4400,1452,0,0,0,8800,13200,22000,0,0,1452,0,0,0,0,8800,0,0,0,0,0,0,0,0,0,8800,13200,1452,0,0,0,2904,8800,0,0,0,0,0,1452,0,0,0,0,0,0,176000,44000,13200,22000,0,4400,0,0,0,8800,4400,8800,0,0,0,4400,0,1452,2904,1452,57200,136400,66000,0,8800,17600,66000,1452,0,1452,22000,8800,0,0,0,0,0,4400,0,0,26400,4400,26400,22000,13200,0,0,0,0,0,26400,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,26400,0,0,0,1452,0,1452,0,1452,1452,1452,0,0,0,0,8800,0,2904,0,2904,0,0,0,17600,1452,17600,4400,1452,2904,8800,35200,0,0,0,13200,0,0,0,0,0,0,0,1452,0,0,0,8800,4400,0,1452,0,0,0,1452,0,1452,0,13200,2904,0,0,13200,2904,0,1452,0,1452,0,0,0,0,0,0,0,1452
8,Dinoflagellate,Noctiluca,Noctiluca scintillans,eSNCM,endosymbionts,meso,523598800.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Dinoflagellate,Prorocentrum,Prorocentrum lima,CM,"presence of feeding vacuoles, unknown prey",micro,570213.8,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [5]:
## calc_totals
totals = lis.groupby('Phylum', as_index=False, sort=False).sum()

# empty text-containing columns
totals = totals.drop(columns=["Status"], axis=1, errors='ignore')
totals["Genus"] = ""
totals["Species"] = ""
totals["MFT"] = ""
totals["Evidence of mixoplankton activity"] = ""
totals["size class"] = ""
totals['Volume (µm³/cell)'] = ""

# rename to TOTAL "   "
totals["Phylum"] = totals["Phylum"].str.upper().apply(lambda x: "TOTAL " + x + "S")

totals

Unnamed: 0,Phylum,Genus,Species,MFT,Evidence of mixoplankton activity,size class,Volume (µm³/cell),Total Biomass (pgC),Totals,1/8/16,1/8/16.1,1/8/16.2,1/8/16.3,1/8/16.4,1/8/16.5,1/8/16.6,1/8/16.7,1/8/16.8,1/7/16,1/8/16.9,1/8/16.10,1/8/16.11,1/8/16.12,1/8/16.13,1/8/16.14,1/8/16.15,1/8/16.16,1/8/16.17,1/7/16.1,2/1/16,2/1/16.1,2/1/16.2,2/1/16.3,2/2/16,2/2/16.1,2/2/16.2,2/4/16,2/4/16.1,2/4/16.2,2/1/16.4,2/1/16.5,2/1/16.6,2/1/16.7,2/2/16.3,2/2/16.4,2/2/16.5,2/4/16.3,2/4/16.4,2/4/16.5,3/8/16,3/8/16.1,3/8/16.2,3/8/16.3,3/9/16,3/9/16.1,3/9/16.2,3/10/16,3/10/16.1,3/10/16.2,3/8/16.4,3/8/16.5,3/8/16.6,3/8/16.7,3/9/16.3,3/9/16.4,3/9/16.5,3/10/16.3,3/10/16.4,3/10/16.5,4/4/16,4/4/16.1,4/4/16.2,4/4/16.3,4/4/16.4,4/6/16,4/6/16.1,4/13/16,4/13/16.1,4/13/16.2,4/4/16.5,4/4/16.6,4/4/16.7,4/4/16.8,4/4/16.9,4/6/16.2,4/6/16.3,4/13/16.3,4/13/16.4,4/13/16.5,5/12/16,5/12/16.1,5/12/16.2,5/12/16.3,5/12/16.4,5/18/16,5/18/16.1,5/11/16,5/11/16.1,5/11/16.2,5/12/16.5,5/12/16.6,5/12/16.7,5/12/16.8,5/12/16.9,5/18/16.2,5/18/16.3,5/11/16.3,5/11/16.4,5/11/16.5,6/10/16,6/10/16.1,6/10/16.2,6/10/16.3,6/14/16,6/14/16.1,6/14/16.2,6/14/16.3,6/8/16,6/8/16.1,6/10/16.4,6/10/16.5,6/10/16.6,6/10/16.7,6/14/16.4,6/14/16.5,6/14/16.6,6/14/16.7,6/8/16.2,6/8/16.3,7/6/16,7/6/16.1,7/6/16.2,7/6/16.3,7/7/16,7/7/16.1,7/7/16.2,7/5/16,7/5/16.1,7/5/16.2,7/6/16.4,7/6/16.5,7/6/16.6,7/6/16.7,7/7/16.3,7/7/16.4,7/7/16.5,7/5/16.3,7/5/16.4,7/5/16.5,8/3/16,8/3/16.1,8/3/16.2,8/4/16,8/4/16.1,8/4/16.2,8/4/16.3,8/1/16,8/1/16.1,8/1/16.2,8/3/16.3,8/3/16.4,8/3/16.5,8/4/16.4,8/4/16.5,8/4/16.6,8/4/16.7,8/1/16.3,8/1/16.4,8/1/16.5,8/30/16,8/30/16.1,8/30/16.2,8/30/16.3,8/30/16.4,8/31/16,8/31/16.1,8/29/16,8/29/16.1,8/29/16.2,8/30/16.5,8/30/16.6,8/30/16.7,8/30/16.8,8/30/16.9,8/31/16.2,8/31/16.3,8/29/16.3,8/29/16.4,8/29/16.5,10/5/16,10/5/16.1,10/5/16.2,10/5/16.3,10/4/16,10/4/16.1,10/4/16.2,10/3/16,10/3/16.1,10/3/16.2,10/5/16.4,10/5/16.5,10/5/16.6,10/5/16.7,10/4/16.3,10/4/16.4,10/4/16.5,10/3/16.3,10/3/16.4,10/3/16.5,11/9/16,11/9/16.1,11/9/16.2,11/9/16.3,11/8/16,11/8/16.1,11/8/16.2,11/7/16,11/7/16.1,11/7/16.2,11/9/16.4,11/9/16.5,11/9/16.6,11/9/16.7,11/8/16.3,11/8/16.4,11/8/16.5,11/7/16.3,11/7/16.4,11/7/16.5,12/5/16,12/5/16.1,12/5/16.2,12/5/16.3,12/6/16,12/6/16.1,12/6/16.2,11/30/16,11/30/16.1,11/30/16.2,12/5/16.4,12/5/16.5,12/5/16.6,12/5/16.7,12/6/16.3,12/6/16.4,12/6/16.5,11/30/16.3,11/30/16.4,11/30/16.5
0,TOTAL DINOFLAGELLATES,,,,,,,253438052522.4908,1666984,13200,88000,70400,0,0,0,8800,13200,0,8800,8800,8800,13200,0,1452,1452,5852,4400,0,0,17600,0,8800,0,0,1452,13200,48400,0,0,17600,1452,8800,1452,0,0,0,0,0,0,2904,0,8800,2904,1452,8800,8800,8800,0,0,8800,0,8800,0,0,4400,1452,0,0,0,8800,13200,22000,0,0,1452,1452,0,0,0,8800,0,0,0,0,0,0,0,0,0,8800,14652,2904,0,1452,0,2904,8800,0,0,0,1452,0,1452,0,0,0,0,0,0,176000,52800,17600,23452,4356,5852,0,0,1452,8800,4400,8800,0,0,0,4400,1452,2904,2904,1452,63052,139304,67452,1452,19052,30800,66000,5852,0,4356,26400,10252,0,0,0,1452,0,10252,0,0,29304,5852,29304,36608,22000,0,0,1452,0,1452,26400,0,0,0,0,0,0,0,0,4400,0,1452,0,0,0,26400,0,0,0,1452,0,1452,1452,1452,1452,1452,0,0,0,0,8800,0,2904,1452,2904,0,1452,0,17600,1452,17600,4400,2904,2904,8800,35200,1452,0,0,13200,0,1452,0,1452,0,0,0,1452,0,0,0,8800,4400,0,1452,0,0,0,1452,0,1452,0,13200,2904,0,0,13200,2904,0,1452,704,1452,0,0,0,0,0,0,0,1452
1,TOTAL RAPHIDOPHYTES,,,,,,,367007354.697494,674652,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13200,0,0,0,0,0,0,0,0,0,0,0,0,132000,0,52800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,44000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,431200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,TOTAL OCHROPHYTE (DIATOMS EXCLUDED)S,,,,,,,511650258.044051,6271452,8800,0,0,0,13200,0,8800,0,30800,0,0,88000,88000,13200,0,0,22000,22000,0,8800,0,0,0,0,0,22000,88000,8800,0,8800,0,0,0,0,0,35200,44000,0,0,0,0,17600,8800,0,8800,8800,0,8800,0,0,0,22000,0,22000,0,22000,0,66000,0,0,0,88000,22000,132000,13200,0,0,17600,0,8800,22000,176000,35200,44000,44000,0,0,0,0,0,88000,88000,0,440000,22000,88000,8800,0,0,0,176000,88000,132000,176000,8800,132000,44000,132000,88000,44000,44000,66000,22000,44000,0,0,48400,83600,44000,22000,0,0,0,22000,0,0,17600,17600,44000,0,22000,26400,66000,88000,0,0,101200,176000,0,52800,396000,44000,176000,0,22000,88000,0,22000,0,0,52800,44000,308000,88000,0,0,0,4400,0,0,35200,88000,22000,132000,44000,44000,0,0,0,22000,0,0,0,0,0,0,0,22000,0,0,0,22000,0,0,8800,0,22000,0,0,0,0,22000,0,0,0,0,0,88000,0,0,0,0,0,17600,0,0,0,0,0,0,22000,0,0,44000,8800,13200,0,22000,0,0,22000,0,0,0,0,0,1452,8800,13200,22000,0,22000,0,22000,17600,22000,13200,0,0,0,0,13200,0,22000,0,66000,0,0,0,0
3,TOTAL HAPTOPHYTES,,,,,,,280493442.499737,32252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,22000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8800,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [6]:
## add_multiheader
lis = lis.copy()
needed_cols = pd.Series(np.full(len(lis.columns) - len(orig_header), None))  # create series of "None"'s to be concatted
original_headers = pd.concat([needed_cols, orig_header.to_series()], ignore_index=True)  # concat "None"'s so lines up correctly
lis.columns = pd.MultiIndex.from_arrays([original_headers, lis.columns])

# Isolate Station/Date columns
species_columns = lis.columns.get_level_values(1).isin(['Status', 'Phylum', 'Genus', 'Species', 'MFT', 'Evidence of mixoplankton activity', 'size class', 'Totals', 'Volume (µm³/cell)', 'Total Biomass (pgC)'])
removed_columns = lis.loc[:, species_columns].copy()

lis = lis.loc[:, ~species_columns]
lis.columns = pd.MultiIndex.from_tuples(lis.columns, names=['Station', 'Date'])

# Extract the 'Date' level and convert to Series
date_level = lis.columns.get_level_values('Date').to_series()

# Extract month numbers and day numbers from date strings
date_parts = date_level.str.extract(r'(\d{1,2})/(\d{1,2})/(\d{2})', expand=False)
month_numbers = pd.to_numeric(date_parts[0], errors='coerce')
day_numbers = pd.to_numeric(date_parts[1], errors='coerce')

# Map for month names
month_dict = {1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June', 7: 'July', 8: 'August', 9: 'September', 10: 'October', 11: 'November', 12: 'December'}

# Adjust month if day >= 26
month_numbers = month_numbers + (day_numbers >= 26).astype(int)
month_names = month_numbers.map(month_dict).fillna(date_level.str.extract(r'(\b\w+\b)')[0]).fillna('Unknown')

# Normalize station names and create new MultiIndex
station_level = lis.columns.get_level_values('Station').str.split(' ').str[0]
lis.columns = pd.MultiIndex.from_arrays([month_names, station_level, date_level], names=['Month', 'Station', 'Date'])

# Add back initially removed columns
needed_cols = pd.Series(np.full(len(removed_columns.columns), None))
removed_columns.columns = pd.MultiIndex.from_arrays([needed_cols, removed_columns.columns.get_level_values(0), removed_columns.columns.get_level_values(1)])
lis = pd.concat([removed_columns, lis], axis=1)

lis

Unnamed: 0_level_0,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,January,January,January,January,January,January,January,January,January,January,January,January,January,January,January,January,January,January,January,January,February,February,February,February,February,February,February,February,February,February,February,February,February,February,February,February,February,February,February,February,March,March,March,March,March,March,March,March,March,March,March,March,March,March,March,March,March,March,March,March,April,April,April,April,April,April,April,April,April,April,April,April,April,April,April,April,April,April,April,April,May,May,May,May,May,May,May,May,May,May,May,May,May,May,May,May,May,May,May,May,June,June,June,June,June,June,June,June,June,June,June,June,June,June,June,June,June,June,June,June,July,July,July,July,July,July,July,July,July,July,July,July,July,July,July,July,July,July,July,July,August,August,August,August,August,August,August,August,August,August,August,August,August,August,August,August,August,August,August,August,September,September,September,September,September,September,September,September,September,September,September,September,September,September,September,September,September,September,September,September,October,October,October,October,October,October,October,October,October,October,October,October,October,October,October,October,October,October,October,October,November,November,November,November,November,November,November,November,November,November,November,November,November,November,November,November,November,November,November,November,December,December,December,December,December,December,December,December,December,December,December,December,December,December,December,December,December,December,December,December
Unnamed: 0_level_1,NaN,NaN,NaN,NaN,NaN,NaN,NaN,"(Note: S: surface water sample, B: bottom water sample",cell abundance (cells/L),A4S,B3S,C1S,D3S,E1S,F2S,H4S,I2S,J2S,K2S,A4B,B3B,C1B,D3B,E1B,F2B,H4B,I2B,J2B,K2B,A4S,B3S,C1S,D3S,E1S,F2S,H4S,I2S,J2S,K2S,A4B,B3B,C1B,D3B,E1B,F2B,H4B,I2B,J2B,K2B,A4S,B3S,C1S,D3S,E1S,F2S,H4S,I2S,J2S,K2S,A4B,B3B,C1B,D3B,E1B,F2B,H4B,I2B,J2B,K2B,A4S.1,B3S,C1S,D3S,E1S,F2S,H4S,I2S,J2S,K2S,A4B.1,B3B.1,C1B.1,D3B,E1B,F2B,H4B,I2B,J2B,K2B,A4S,B3S,C1S,D3S,E1S,F2S,H4S,I2S.1,J2S,K2S,A4B,B3B,C1B,D3B,E1B,F2B,H4B,I2B,J2B,K2B,A4S,B3S,C1S,D3S,E1S,F2S,H4S,I2S,J2S,K2S,A4B,B3B,C1B,D3B,E1B,F2B,H4B,I2B,J2B,K2B,A4S,B3S,C1S,D3S,E1S,F2S,H4S.1,I2S,J2S,K2S,A4B,B3B.2,C1B.2,D3B.1,E1B.1,F2B.1,H4B.1,I2B,J2B,K2B,A4S,B3S,C1S.1,D3S,E1S,F2S,H4S,I2S.2,J2S,K2S.1,A4B,B3B.3,C1B,D3B,E1B,F2S.1,H4B.2,I2B,J2B,K2B,A4S,B3S,C1S.2,D3S.1,E1S.1,F2S.2,H4S,I2S.3,J2S.1,K2S.2,A4B,B3B,C1B,D3B,E1B,F2B,H4B,I2B.1,J2B,K2B.1,A4S.2,B3S.1,C1S.3,D3S.2,E1S.2,F2S.3,H4S.2,I2S.4,J2S.2,K2S.3,A4B,B3B,C1B.3,D3B.2,E1B.2,F2B.2,H4B.3,I2B.2,J2B.1,K2B.2,A4S,B3S,C1S,D3S.3,E1S.3,F2S.4,H4S.3,I2S,J2S,K2S,A4B,B3B,C1B,D3B.3,E1B,F2B.3,H4B,I2B,J2B,K2B,A4S,B3S.2,C1S.4,D3S.4,E1S.4,F2S,H4S,I2S,J2S,K2S,A4B,B3B.4,C1B,D3B,E1B.3,F2B,H4B,I2B,J2B,K2B
Unnamed: 0_level_2,Phylum,Genus,Species,MFT,Evidence of mixoplankton activity,size class,Volume (µm³/cell),Total Biomass (pgC),Totals,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/7/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/8/16,1/7/16,2/1/16,2/1/16,2/1/16,2/1/16,2/2/16,2/2/16,2/2/16,2/4/16,2/4/16,2/4/16,2/1/16,2/1/16,2/1/16,2/1/16,2/2/16,2/2/16,2/2/16,2/4/16,2/4/16,2/4/16,3/8/16,3/8/16,3/8/16,3/8/16,3/9/16,3/9/16,3/9/16,3/10/16,3/10/16,3/10/16,3/8/16,3/8/16,3/8/16,3/8/16,3/9/16,3/9/16,3/9/16,3/10/16,3/10/16,3/10/16,4/4/16,4/4/16,4/4/16,4/4/16,4/4/16,4/6/16,4/6/16,4/13/16,4/13/16,4/13/16,4/4/16,4/4/16,4/4/16,4/4/16,4/4/16,4/6/16,4/6/16,4/13/16,4/13/16,4/13/16,5/12/16,5/12/16,5/12/16,5/12/16,5/12/16,5/18/16,5/18/16,5/11/16,5/11/16,5/11/16,5/12/16,5/12/16,5/12/16,5/12/16,5/12/16,5/18/16,5/18/16,5/11/16,5/11/16,5/11/16,6/10/16,6/10/16,6/10/16,6/10/16,6/14/16,6/14/16,6/14/16,6/14/16,6/8/16,6/8/16,6/10/16,6/10/16,6/10/16,6/10/16,6/14/16,6/14/16,6/14/16,6/14/16,6/8/16,6/8/16,7/6/16,7/6/16,7/6/16,7/6/16,7/7/16,7/7/16,7/7/16,7/5/16,7/5/16,7/5/16,7/6/16,7/6/16,7/6/16,7/6/16,7/7/16,7/7/16,7/7/16,7/5/16,7/5/16,7/5/16,8/3/16,8/3/16,8/3/16,8/4/16,8/4/16,8/4/16,8/4/16,8/1/16,8/1/16,8/1/16,8/3/16,8/3/16,8/3/16,8/4/16,8/4/16,8/4/16,8/4/16,8/1/16,8/1/16,8/1/16,8/30/16,8/30/16,8/30/16,8/30/16,8/30/16,8/31/16,8/31/16,8/29/16,8/29/16,8/29/16,8/30/16,8/30/16,8/30/16,8/30/16,8/30/16,8/31/16,8/31/16,8/29/16,8/29/16,8/29/16,10/5/16,10/5/16,10/5/16,10/5/16,10/4/16,10/4/16,10/4/16,10/3/16,10/3/16,10/3/16,10/5/16,10/5/16,10/5/16,10/5/16,10/4/16,10/4/16,10/4/16,10/3/16,10/3/16,10/3/16,11/9/16,11/9/16,11/9/16,11/9/16,11/8/16,11/8/16,11/8/16,11/7/16,11/7/16,11/7/16,11/9/16,11/9/16,11/9/16,11/9/16,11/8/16,11/8/16,11/8/16,11/7/16,11/7/16,11/7/16,12/5/16,12/5/16,12/5/16,12/5/16,12/6/16,12/6/16,12/6/16,11/30/16,11/30/16,11/30/16,12/5/16,12/5/16,12/5/16,12/5/16,12/6/16,12/6/16,12/6/16,11/30/16,11/30/16,11/30/16
0,Dinoflagellate,Akashiwo,Akashiwo sanguinea,CM,"ingestion of ciliates, Isochrysis, Cryptophyte...",micro,58643.06,18828069.502962,2904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Dinoflagellate,Amphidinium,Amphidinium carterae,CM,ingestion of Skeletonema costatum,nano,7422.013,15604550.754418,16764,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,8800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,1452,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,704,0,0,0,0,0,0,0,0,0
2,Dinoflagellate,Dinophysis,Dinophysis acuminata,pSNCM,photosynthetic Dinophysis spp. obtain plastids...,micro,25735.93,257891929.305965,86196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,8800,4400,1452,1452,0,0,0,1452,0,0,0,0,0,0,0,1452,1452,0,0,4400,2904,1452,1452,10252,13200,0,4400,0,0,4400,1452,0,0,0,1452,0,4400,0,0,2904,1452,2904,2904,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Dinoflagellate,Dinophysis,Dinophysis miles,pSNCM*,This species retains chloroplasts from cryptop...,micro,301592.9,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Dinoflagellate,Dinophysis,Dinophysis norvegica,pSNCM,photosynthetic Dinophysis spp. obtain plastids...,micro,32070.43,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Dinoflagellate,Gambierdiscus,Gambierdiscus toxicus,CM,"presence of feeding vacuoles, unknown prey",micro,217309.2,647047708.998784,29172,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,1452,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2904,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,1452,0,0,0,0,0,1452,8800,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,4400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Dinoflagellate,Gonyaulax,Gonyaulax polygramma,CM,"ingestion of cryptophyte species, Amphidinium ...",micro,1948311.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Dinoflagellate,Heterocapsa,Heterocapsa circularisquama,CM,bacteria in food vacuoles,nano,1840777.0,252419002007.8688,1530496,13200,88000,70400,0,0,0,8800,13200,0,8800,8800,8800,13200,0,1452,1452,5852,4400,0,0,17600,0,8800,0,0,1452,13200,48400,0,0,17600,1452,8800,1452,0,0,0,0,0,0,2904,0,8800,2904,1452,8800,8800,8800,0,0,8800,0,8800,0,0,4400,1452,0,0,0,8800,13200,22000,0,0,1452,0,0,0,0,8800,0,0,0,0,0,0,0,0,0,8800,13200,1452,0,0,0,2904,8800,0,0,0,0,0,1452,0,0,0,0,0,0,176000,44000,13200,22000,0,4400,0,0,0,8800,4400,8800,0,0,0,4400,0,1452,2904,1452,57200,136400,66000,0,8800,17600,66000,1452,0,1452,22000,8800,0,0,0,0,0,4400,0,0,26400,4400,26400,22000,13200,0,0,0,0,0,26400,0,0,0,0,0,0,0,0,0,0,1452,0,0,0,26400,0,0,0,1452,0,1452,0,1452,1452,1452,0,0,0,0,8800,0,2904,0,2904,0,0,0,17600,1452,17600,4400,1452,2904,8800,35200,0,0,0,13200,0,0,0,0,0,0,0,1452,0,0,0,8800,4400,0,1452,0,0,0,1452,0,1452,0,13200,2904,0,0,13200,2904,0,1452,0,1452,0,0,0,0,0,0,0,1452
8,Dinoflagellate,Noctiluca,Noctiluca scintillans,eSNCM,endosymbionts,meso,523598800.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Dinoflagellate,Prorocentrum,Prorocentrum lima,CM,"presence of feeding vacuoles, unknown prey",micro,570213.8,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [7]:
## make_pretty
# add in line skips
totals = lis.set_index(lis.groupby(['Phylum']).tail(1).index + 0.1)
empty_df = pd.DataFrame("", index=lis.groupby(['Phylum']).tail(1).index+0.2, columns=totals.columns)
totals = pd.concat([totals, empty_df]).sort_index()

with_totals = pd.concat([lis, totals]).sort_index().reset_index(drop=True)  # add totals w/ line skips
        
# with_headers = add_multiheader(with_totals)

with_totals

KeyError: 'Phylum'