# Loading Functions

In [2]:
import pandas as pd
import numpy as np


## Creating Real Time data set
### Datetime is in UTC

In [37]:
realtime_path = '/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/SeaFlow_realtime_TN413.csv'

#changing column naes so that the date is just called data
realtime_cols = ['time',
 'lat',
 'lon',
 'population',
 'abundance_cells_per_microliter',
 'diameter_micrometer']

#reading in the csv to a pandas df
realtime = pd.read_csv(realtime_path, names=realtime_cols)
# getting rid of the first row since it has the column names repeated
realtime = realtime.tail(-1)
realtime

Unnamed: 0,time,lat,lon,population,abundance_cells_per_microliter,diameter_micrometer
1,2023-02-25T05:45:57Z,21.3067,-157.0366,picoeuk,14.30323085044253,1.55786005995454
2,2023-02-25T05:51:57Z,21.3148,-157.0362,prochloro,59.90896889937337,0.663476451861207
3,2023-02-25T05:54:58Z,21.3142,-157.0321,picoeuk,20.71698119210922,1.5760000524204352
4,2023-02-25T05:54:58Z,21.3142,-157.0321,prochloro,64.44967710586306,0.670356077823434
5,2023-02-25T05:57:58Z,21.3133,-157.0266,prochloro,68.45117621283211,0.6805887892801286
...,...,...,...,...,...,...
10570,2023-03-11T08:30:41Z,-19.4207,-181.8025,synecho,0.25541483661504516,1.07457158316671
10571,2023-03-11T08:33:41Z,-19.4208,-181.792,picoeuk,4.370431648746329,1.985346246474965
10572,2023-03-11T08:36:41Z,-19.4207,-181.7817,picoeuk,4.427190501327449,2.01327502068565
10573,2023-03-11T08:36:41Z,-19.4207,-181.7817,prochloro,92.26151487061243,0.572558873170014


## Averaging data over 10 minute resolution

In [38]:
realtime['abundance_cells_per_microliter'] = pd.to_numeric(realtime['abundance_cells_per_microliter'])

realtime = realtime.dropna(subset=['time','lat', 'lon']).reset_index().drop('index',axis=1)
realtime['time'] = realtime['time'].str[:-1]
realtime['lat'] = realtime['lat'].astype(float)
realtime['lon'] = realtime['lon'].astype(float)
realtime['abundance_cells_per_microliter'] = realtime['abundance_cells_per_microliter'].astype(float)
realtime['diameter_micrometer'] = realtime['diameter_micrometer'].astype(float)
realtime['depth'] = 0

print(realtime.dtypes)
realtime.head(4)

time                               object
lat                               float64
lon                               float64
population                         object
abundance_cells_per_microliter    float64
diameter_micrometer               float64
depth                               int64
dtype: object


Unnamed: 0,time,lat,lon,population,abundance_cells_per_microliter,diameter_micrometer,depth
0,2023-02-25T05:45:57,21.3067,-157.0366,picoeuk,14.303231,1.55786,0
1,2023-02-25T05:51:57,21.3148,-157.0362,prochloro,59.908969,0.663476,0
2,2023-02-25T05:54:58,21.3142,-157.0321,picoeuk,20.716981,1.576,0
3,2023-02-25T05:54:58,21.3142,-157.0321,prochloro,64.449677,0.670356,0


In [39]:
#averaging over every 10 min
realtime['time'] = pd.to_datetime(realtime['time'])
#this drops the phytoplankton population categorical column, need to recalulate
realtime_avg = (realtime
                .groupby('population')
                .resample('10min', on='time')
                .mean(numeric_only=False)
                .reset_index()
               )
realtime_avg
# making time a string object again
realtime_avg['time'] = realtime_avg['time'].astype(str)
realtime_avg['time'] = realtime_avg['time'].str.replace(' ', 'T')


  .mean(numeric_only=False)
  .mean(numeric_only=False)
  .mean(numeric_only=False)


In [40]:
realtime_avg.dropna()

Unnamed: 0,population,time,lat,lon,abundance_cells_per_microliter,diameter_micrometer,depth
0,picoeuk,2023-02-25T05:40:00,21.30670,-157.03660,14.303231,1.557860,0.0
1,picoeuk,2023-02-25T05:50:00,21.31420,-157.03210,20.716981,1.576000,0.0
2,picoeuk,2023-02-25T06:00:00,21.31230,-157.01680,22.590023,1.606258,0.0
3,picoeuk,2023-02-25T06:10:00,21.31170,-156.99750,20.915637,1.544307,0.0
4,picoeuk,2023-02-25T06:20:00,21.31105,-156.98050,22.320419,1.549379,0.0
...,...,...,...,...,...,...,...
5884,synecho,2023-03-11T07:30:00,-19.42570,-181.98925,0.482450,1.016515,0.0
5885,synecho,2023-03-11T07:40:00,-19.42510,-181.95800,0.368933,1.136124,0.0
5887,synecho,2023-03-11T08:00:00,-19.42280,-181.87520,0.283794,1.008841,0.0
5889,synecho,2023-03-11T08:20:00,-19.42080,-181.81290,0.283794,1.296998,0.0


# Using SimonCMAP to gather additional features

#### First installing and importing pycmap 

In [15]:
# !pip install pycmap
import pycmap

### Prepping realtime data for colocalization using Simon's CMAP

#### Setting API

In [13]:
api = pycmap.API(token='<6e1eb1d3-d364-4dfb-9121-8c23369dbbbe>')

In [43]:
api.get_dataset_metadata('tblPisces_Forecast_cl1')

Unnamed: 0,Variable,Table_Name,Long_Name,Unit,Make,Sensor,Process_Level,Study_Domain,Temporal_Resolution,Spatial_Resolution,...,Dataset_Short_Name,Data_Source,Distributor,Dataset_Description,Acknowledgement,Dataset_ID,ID,Visualize,Keywords,Refrences
0,chl,tblPisces_Forecast_cl1,Mass Concentration of Chlorophyll a in Sea Water,mg/m^3,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10699,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
1,dissic,tblPisces_Forecast_cl1,Mole Concentration of Dissolved Inorganic Carb...,mol/m^3,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10710,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
2,fe,tblPisces_Forecast_cl1,Mole Concentration of Dissolved Iron in Sea Water,mmol/m^3,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10700,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
3,no3,tblPisces_Forecast_cl1,Mole Concentration of Nitrate in Sea Water,mmol/m^3,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10702,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
4,nppv,tblPisces_Forecast_cl1,Net Primary Production of Biomass Expressed as...,g/m^3/day,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10706,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
5,o2,tblPisces_Forecast_cl1,Mole Concentration of Dissolved Oxygen in Sea ...,mmol/m^3,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10701,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
6,ph,tblPisces_Forecast_cl1,Sea Water pH Reported on Total Scale,,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10707,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
7,phyc,tblPisces_Forecast_cl1,Mole Concentration of Phytoplankton Expressed ...,mmol/m^3,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10704,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
8,po4,tblPisces_Forecast_cl1,Mole Concentration of Phosphate in Sea Water,mmol/m^3,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10703,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",
9,si,tblPisces_Forecast_cl1,Mole Concentration of Silicate in Sea Water,mmol/m^3,Model,Blend,Reprocessed,Biogeochemistry,Daily,1/4Â° X 1/4Â°,...,Mercator_Pisces_Biogeochemistry_Daily_Forecast...,CMEMS - Global Monitoring and Forecasting Centre,http://marine.copernicus.eu,"""The Operational Mercator Ocean biogeochemical...",Data provided by: E.U. Copernicus Marine Servi...,523,10705,1,"0.25, 1/4 degree, 3D, biogeochemistry, blend, ...",


In [None]:
targets = {
        
        # BioGeoChemical Numerical Near-Real-Time Model
        "tblPisces_Forecast_cl1": {
                          "variables": ["NO3", "PO4", "Fe", "Si", "chl", "nppv"],
                          "tolerances": [4, 0.5, 0.5, 5]
                         }
        }


source = realtime_avg

realtime_cmap = pycmap.Sample(
              source=source, 
              targets=targets, 
              replaceWithMonthlyClimatolog=False
             )


In [None]:
realtime_cmap

In [None]:
realtime_cmap.to_csv('/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/realtime_cmap.csv', index=False)

## Doing the same to the covariate dataset

In [3]:
covari_path = '/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/covari_cmap_ready.csv'
covari = pd.read_csv(covari_path, usecols=lambda column: column != 'Unnamed: 0')

covari = covari[['date', 'PopulationName', 'lat', 'lon', 'biomass', 'salin', 'temp', 'cruisename']]
# # making time a string object again
covari['date'] = covari['date'].astype(str)
covari['date'] = covari['date'].str.replace('Z', '')
covari = covari.rename(columns={'date': 'time'})
covari

Unnamed: 0,time,PopulationName,lat,lon,biomass,salin,temp,cruisename
0,2016-04-20T00:00:00,Prochlorococcus,,,9.232478,34.679128,25.740547,KOK1606
1,2016-04-20T00:00:00,Synechococcus,,,0.279172,34.679128,25.740547,KOK1606
2,2016-04-20T00:00:00,non_pro_syn,,,3.079118,34.679128,25.740547,KOK1606
3,2016-04-20T00:00:00,non_pro_syn,,,0.606572,34.679128,25.740547,KOK1606
4,2016-04-20T01:00:00,Prochlorococcus,,,10.175402,34.704286,25.624466,KOK1606
...,...,...,...,...,...,...,...,...
12182,2021-12-30T00:00:00,non_pro_syn,32.673493,-117.545342,3.774488,33.468151,15.189021,TN398
12183,2021-12-30T01:00:00,Prochlorococcus,32.682100,-117.660321,0.874599,33.478846,15.327302,TN398
12184,2021-12-30T01:00:00,Synechococcus,32.682100,-117.660321,9.707579,33.478846,15.327302,TN398
12185,2021-12-30T01:00:00,non_pro_syn,32.682100,-117.660321,2.428084,33.478846,15.327302,TN398


In [16]:


targets = {
        
        # BioGeoChemical Numerical Near-Real-Time Model
        "tblPisces_NRT": {
                          "variables": ["NO3", "PO4", "Fe", "Si", "chl"],
                          "tolerances": [4, 0.5, 0.5, 5]
                         },
    
    "tblPisces_Forecast": {
                          "variables": ["NO3", "PO4", "Fe", "Si", "chl"],
                          "tolerances": [4, 0.5, 0.5, 5]
        }
}

source = covari
covari_cmap = pycmap.Sample(
              source=source, 
              targets=targets, 
              replaceWithMonthlyClimatolog=False
             )


Gathering metadata .... 
Sampling starts
Sampling finished                                                                                                    

In [None]:
covari_cmap

In [46]:
covari_cmap

Unnamed: 0,time,PopulationName,lat,lon,biomass,salin,temp,cruisename,CMAP_NO3_tblPisces_NRT,CMAP_PO4_tblPisces_NRT,CMAP_Fe_tblPisces_NRT,CMAP_Si_tblPisces_NRT,CMAP_chl_tblPisces_NRT
0,2016-04-20T00:00:00,Prochlorococcus,,,9.232478,34.679128,25.740547,KOK1606,,,,,
1,2016-04-20T00:00:00,Synechococcus,,,0.279172,34.679128,25.740547,KOK1606,,,,,
2,2016-04-20T00:00:00,non_pro_syn,,,3.079118,34.679128,25.740547,KOK1606,,,,,
3,2016-04-20T00:00:00,non_pro_syn,,,0.606572,34.679128,25.740547,KOK1606,,,,,
4,2016-04-20T01:00:00,Prochlorococcus,,,10.175402,34.704286,25.624466,KOK1606,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12182,2021-12-30T00:00:00,non_pro_syn,32.673493,-117.545342,3.774488,33.468151,15.189021,TN398,,,,,
12183,2021-12-30T01:00:00,Prochlorococcus,32.682100,-117.660321,0.874599,33.478846,15.327302,TN398,,,,,
12184,2021-12-30T01:00:00,Synechococcus,32.682100,-117.660321,9.707579,33.478846,15.327302,TN398,,,,,
12185,2021-12-30T01:00:00,non_pro_syn,32.682100,-117.660321,2.428084,33.478846,15.327302,TN398,,,,,


In [30]:
covari_cmap.to_csv('/Users/cristianswift/Desktop/Spring-Quarter-2022-2023/SeniorThesis/data/covari_cmap.csv', index=False)


In [None]:

#changing column naes so that the date is just called data
covari_cols = ['time',
 'lat',
 'lon',
 'population',
 'abundance_cells_per_microliter',
 'diameter_micrometer']

#reading in the csv to a pandas df
realtime = pd.read_csv(realtime_path, names=realtime_cols)
# getting rid of the first row since it has the column names repeated
realtime = realtime.tail(-1)
realtime