# Loading Functions

In [24]:
# !pip3 install pandas==2.0.0
# !pip3 install seaborn
# !pip3 install numpy==1.20.3

In [25]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
import matplotlib.cbook as cbook

## Creating Covariable data set
### Datetime is in UTC

In [26]:
covari_path = '/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/covari_cmap.csv'
#Reading in the csv 
covari = pd.read_csv(covari_path, parse_dates=[0],
                    na_values="NaN")


covari = covari.dropna().reset_index(drop=True)

### For N nutrients, only about 1/10th of the data is not null

## Creating Real Time data set that includes CMAP and Underway data

### Loading in realtime data that has colocalized CMAP features

In [27]:
realtime_cmap_path = '/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/modifed/realtime_cmap.csv'
#using pandas to read in as a df
realtime_cmap = (pd.read_csv(realtime_cmap_path,parse_dates=[0]))
#taking a peak at the data
realtime_cmap.head(3)


Unnamed: 0,population,time,lat,lon,abundance_cells_per_microliter,diameter_micrometer,depth,CMAP_NO3_tblPisces_Forecast_cl1,CMAP_PO4_tblPisces_Forecast_cl1,CMAP_Fe_tblPisces_Forecast_cl1,CMAP_Si_tblPisces_Forecast_cl1,CMAP_chl_tblPisces_Forecast_cl1
0,picoeuk,2023-02-25T05:40:00,21.3067,-157.0366,14.303231,1.55786,0.0,0.000562,0.212505,0.000433,2.092356,0.072018
1,picoeuk,2023-02-25T05:50:00,21.3142,-157.0321,20.716981,1.576,0.0,0.000562,0.212505,0.000433,2.092356,0.072018
2,picoeuk,2023-02-25T06:00:00,21.3123,-157.0168,22.590023,1.606258,0.0,0.000562,0.212505,0.000433,2.092356,0.072018


### Calculating Biomass
Given calculation

In [28]:
#first calulating volumn of a sphere given plankton diamter
import math
plankton_volume = (4/3) * math.pi * (realtime_cmap['diameter_micrometer']/2)**3
#calulating carbon quota
carbon_quota = 0.261 * plankton_volume**0.86
# multipling the result by abundance to get total biomass per measurement (multipling by 1000 to get pgC per L)
realtime_cmap['biomass'] = realtime_cmap['abundance_cells_per_microliter'] * carbon_quota

### Loading in cruise underway CTD data

In [29]:
underway_path = '/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/TN413-underway.tab'
#reading in underway data and dropping unnessasary columns (conductivity, par)
underway = (pd.read_csv(underway_path, delimiter='	',skiprows=6)
            .drop(columns=['conductivity', 'par'], axis=1)
           )
#fixing time column
underway['time'] = underway['time'].str[:-1]
#dropping rows that dont have no temp or salinity 
underway = underway.dropna()
#making the time column a datetime64[ns]
underway['time'] = pd.to_datetime(underway['time'])
underway
underway.to_csv('/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/underway.csv', index=False)

In [30]:
underway.shape

(21438, 5)

In [31]:
underway.head(4)

Unnamed: 0,time,lat,lon,temp,salinity
339,2023-02-25 04:20:02,21.2259,-157.0856,26.5682,34.8463
340,2023-02-25 04:21:02,21.2251,-157.0827,25.0258,35.009
341,2023-02-25 04:22:02,21.2243,-157.0801,24.5832,34.9939
342,2023-02-25 04:23:02,21.2236,-157.0774,24.396,34.9996


## Joining realtime_cmap and underway data into one dataframe

### First we need to average underway data frame to an 10 min resolution

In [33]:
underway['time'] = pd.to_datetime(underway['time'])
#this drops the phytoplankton population categorical column, need to recalulate
underway_avg = underway.resample('10min', on='time').mean(numeric_only=True)
underway_avg.shape

(2440, 4)

### Now we can merge the underway CTD and Seaflow/CMAP data into one dataframe called TN413, after the cruise from which the observational data comes from.

In [49]:
realtime_cmap['time'] = pd.to_datetime(realtime_cmap['time'])
tn413 = realtime_cmap.merge(underway_avg.drop(['lat','lon'], axis=1), left_on=['time'], right_on=['time'])

In [48]:
tn413.head(4)

Unnamed: 0,population,time,lat,lon,abundance_cells_per_microliter,diameter_micrometer,depth,CMAP_NO3_tblPisces_Forecast_cl1,CMAP_PO4_tblPisces_Forecast_cl1,CMAP_Fe_tblPisces_Forecast_cl1,CMAP_Si_tblPisces_Forecast_cl1,CMAP_chl_tblPisces_Forecast_cl1,biomass,temp,salinity
0,picoeuk,2023-02-25 05:40:00,21.3067,-157.0366,14.303231,1.55786,0.0,0.000562,0.212505,0.000433,2.092356,0.072018,6.716405,24.03829,34.93311
1,picoeuk,2023-02-25 05:50:00,21.3142,-157.0321,20.716981,1.576,0.0,0.000562,0.212505,0.000433,2.092356,0.072018,10.023072,24.0135,34.9232
2,prochloro,2023-02-25 05:50:00,21.3141,-157.031633,64.269941,0.671474,0.0,0.000562,0.212505,0.000433,2.092356,0.072018,3.441297,24.0135,34.9232
3,picoeuk,2023-02-25 06:00:00,21.3123,-157.0168,22.590023,1.606258,0.0,0.000562,0.212505,0.000433,2.092356,0.072018,11.478879,23.98739,34.93976


### Renaming Columns of both tn413 and covari dataframes so they're simpleier and the same.

In [40]:
#rename columns for the model

tn413 = (tn413
         .dropna()
         .drop(['depth'], axis=1)
         
         .rename({'time':'date', 'population':'PopulationName','CMAP_NO3_tblPisces_Forecast_cl1': 'NO3NO2', 'CMAP_PO4_tblPisces_Forecast_cl1': 'PO4',
             'CMAP_Fe_tblPisces_Forecast_cl1':'Fe', 'CMAP_Si_tblPisces_Forecast_cl1': 'SiO4',
                  'CMAP_chl_tblPisces_Forecast_cl1': 'Satellite_CHL',
                  'salinity':'salin', 'abundance_cells_per_microliter': 'cell_abundance',
                  'diameter_micrometer':'cell_diameter'
             
         },axis=1)


        )
#rename columns for the model

covari = (covari
         .dropna()
         .rename({'time':'date', 'population':'PopulationName','CMAP_NO3_tblPisces_NRT': 'NO3NO2', 'CMAP_PO4_tblPisces_NRT': 'PO4',
             'CMAP_Fe_tblPisces_NRT':'Fe', 'CMAP_Si_tblPisces_NRT': 'SiO4',
                  'CMAP_chl_tblPisces_Forecast_cl1': 'Satellite_CHL',
                  'salinity':'salin', 'abundance_cells_per_microliter': 'cell_abundance',
                  'CMAP_chl_tblPisces_NRT':'Satellite_CHL'
             
         },axis=1)
        )
#renaming these
covari['PopulationName'] = covari['PopulationName'].replace(
    {'nanoeukaryotes (2-5µm)': 'non_pro_syn', 'picoeukaryotes (< 2µm)': 'non_pro_syn'}
)
                            
                       
        
covari.to_csv('/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/covari_clean.csv')
tn413.to_csv('/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/TN413_merged_10min.csv', index=False)

#renaming these
covari['PopulationName'] = covari['PopulationName'].replace(
    {'nanoeukaryotes (2-5µm)': 'non_pro_syn', 'picoeukaryotes (< 2µm)': 'non_pro_syn'}
)