


![Python logo](https://cmap.readthedocs.io/en/latest/_static/CMAP_logos/CMAP_logo_High_Res.png) 
# In this notebook we will download enviormental data Using [Simons CMAP](https://simonscmap.com).

## We will create a dataframe that has all the latitude and longitude values that we want to sample, and then use these as targets to sample CMAP

In [1]:
# Set a working directory
#!pip install GitPython
import git
import os

repo = git.Repo('.', search_parent_directories=True)


os.chdir(repo.working_tree_dir)

In [2]:
#!pip install global_land_mask
import pandas as pd
import numpy as np
from global_land_mask import globe

## Creating a df that has every lat/lon point we want to sample.


In [3]:
lat_list = list(range(-80,81,1))
lon_list = list(range(-180,181,1))

lat = []
lon = []

for i in lat_list:
    for j in lon_list:
        if not globe.is_land(i,j):
            lat.append(i)
            lon.append(j)

In [4]:
predictors = pd.DataFrame({'lat':lat,'lon':lon})
print(predictors)
print(predictors.dtypes)
predictors['lat'] = predictors['lat'].astype('float64')
predictors['lon'] = predictors['lon'].astype('float64')
print(predictors.dtypes)

       lat  lon
0      -80 -180
1      -80 -179
2      -80 -178
3      -80 -177
4      -80 -176
...    ...  ...
39989   80  176
39990   80  177
39991   80  178
39992   80  179
39993   80  180

[39994 rows x 2 columns]
lat    int64
lon    int64
dtype: object
lat    float64
lon    float64
dtype: object


#### Picking an arbitrary date and time to sample, this can be changed but you will have to re-sample CMAP

In [5]:
predictors['date'] = '2023-04-10'
predictors['date'] = pd.to_datetime(predictors['date'])

In [6]:
import ephem

def find_sunrise(row):
    obs = ephem.Observer()
    obs.lat = str(row['lat'])
    obs.long = str(row['lon'])
    obs.date = row['date']


    sunrise = str(obs.previous_rising(ephem.Sun()))
    return sunrise

predictors['sunrise'] = predictors.apply(find_sunrise, axis=1)

In [7]:
predictors['sunrise'] = pd.to_datetime(predictors['sunrise'])
predictors['time'] = predictors['sunrise'] + pd.Timedelta(hours=4)

print(predictors)

        lat    lon       date             sunrise                time
0     -80.0 -180.0 2023-04-10 2023-04-09 20:52:39 2023-04-10 00:52:39
1     -80.0 -179.0 2023-04-10 2023-04-09 20:48:37 2023-04-10 00:48:37
2     -80.0 -178.0 2023-04-10 2023-04-09 20:44:35 2023-04-10 00:44:35
3     -80.0 -177.0 2023-04-10 2023-04-09 20:40:33 2023-04-10 00:40:33
4     -80.0 -176.0 2023-04-10 2023-04-09 20:36:32 2023-04-10 00:36:32
...     ...    ...        ...                 ...                 ...
39989  80.0  176.0 2023-04-10 2023-04-09 14:25:26 2023-04-09 18:25:26
39990  80.0  177.0 2023-04-10 2023-04-09 14:21:29 2023-04-09 18:21:29
39991  80.0  178.0 2023-04-10 2023-04-09 14:17:31 2023-04-09 18:17:31
39992  80.0  179.0 2023-04-10 2023-04-09 14:13:34 2023-04-09 18:13:34
39993  80.0  180.0 2023-04-10 2023-04-09 14:09:37 2023-04-09 18:09:37

[39994 rows x 5 columns]


In [8]:
# the standard depth for Seaflow measurements
predictors['depth'] = 5.0

In [9]:
pd.DataFrame.to_csv(predictors, 'data_ingest/data/original/predictors.csv', index=False)

### Sampling CMAP

In [10]:
import pycmap
api = pycmap.API(token='<6e1eb1d3-d364-4dfb-9121-8c23369dbbbe>')
predictors['time'] = predictors['time'].dt.strftime('%Y-%m-%d %H:%M:%S')


In [14]:
targets = {
        "tblDarwin_Nutrient_Climatology": {
                          "variables": ["ALK_darwin_clim"],
            # Tolerance varaibles/order: temporal [days], meridional [deg], zonal [deg], and vertical [m]
                          "tolerances": [1, 0.5, 0.5, 5]
                         },
        # CMAP sea surface salinity
        "tblSSS_NRT_cl1": {
                          "variables": ['sss_smap'],
                        # Tolerance varaibles/order: temporal [days], meridional [deg], zonal [deg], and vertical [m]
                          "tolerances": [1, 1, 1, 5]
                         },
        # CMAP sea surface temperature
        "tblSST_AVHRR_OI_NRT": { "variables": ['sst'],
                        # Tolerance varaibles/order: temporal [days], meridional [deg], zonal [deg], and vertical [m]
                          "tolerances": [1, 0.5, 0.5, 5]
                         },
        # CMAP SSH velocity
        "tblAltimetry_REP_Signal": { "variables": ['ugos', 'vgos'],
                        # Tolerance varaibles/order: temporal [days], meridional [deg], zonal [deg], and vertical [m]
                          "tolerances": [1, 0.5, 0.5, 5]
                         },
        # Pisces 2020-24
        "tblPisces_Forecast_cl1": { "variables": ['fe', 'o2', 'no3', 'po4', 'si'],
                        # Tolerance varaibles/order: temporal [days], meridional [deg], zonal [deg], and vertical [m]
                          "tolerances": [1, 0.5, 0.5, 5]
                         },
        }


source = predictors

predictors_cmap = pycmap.Sample(
              source=source, 
              targets=targets, 
              replaceWithMonthlyClimatolog=False
             )

Gathering metadata .... 
Sampling starts
Sampling tblPisces_Forecast_cl1 ... 6565 / 39994                                                          

ChunkedEncodingError: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))

In [None]:
predictors_cmap.head(10)

Unnamed: 0,lat,lon,time,depth,CMAP_sss_smap_tblSSS_NRT_cl1,CMAP_sst_tblSST_AVHRR_OI_NRT,CMAP_ugos_tblAltimetry_REP_Signal,CMAP_vgos_tblAltimetry_REP_Signal,CMAP_fe_tblPisces_Forecast_cl1,CMAP_o2_tblPisces_Forecast_cl1,CMAP_no3_tblPisces_Forecast_cl1,CMAP_po4_tblPisces_Forecast_cl1,CMAP_si_tblPisces_Forecast_cl1,CMAP_talk_tblPisces_Forecast_cl1
0,-73.0,-143.0,2023-04-09 21:04:35,5.0,,-1.668134,-0.005808,-0.016058,0.00023,361.295364,26.627517,1.829983,52.720121,2.345027
1,-73.0,-142.0,2023-04-09 21:00:34,5.0,,-1.663964,-0.00141,-0.01459,0.000215,361.356059,26.601347,1.843454,52.879913,2.344948
2,-73.0,-141.0,2023-04-09 20:56:34,5.0,,-1.717504,0.00161,-0.002052,0.000185,360.978521,26.655291,1.868111,53.449751,2.345255
3,-73.0,-140.0,2023-04-09 20:52:33,5.0,,-1.720005,-0.000979,-0.021442,0.000197,360.647106,26.677789,1.879767,54.381974,2.344987
4,-73.0,-88.0,2023-04-09 17:23:49,5.0,37.974533,-1.331951,-0.0406,-0.031567,0.002671,356.679917,31.386101,1.812338,44.570525,2.298122
5,-73.0,-87.0,2023-04-09 17:19:48,5.0,37.209843,-1.398338,-0.031342,-0.0076,0.001419,357.710907,30.033035,1.872863,49.430683,2.32218
6,-73.0,-86.0,2023-04-09 17:15:47,5.0,37.508607,-1.406158,-0.033887,0.025767,0.001165,357.390835,29.585792,1.90587,52.383142,2.33042
7,-73.0,-85.0,2023-04-09 17:11:46,5.0,37.447979,-1.381464,-0.03266,0.029713,0.001097,357.027335,29.195063,1.921043,53.437034,2.333787
8,-73.0,-84.0,2023-04-09 17:07:45,5.0,36.873035,-1.395422,-0.0216,0.012412,0.000974,357.010935,28.872238,1.946533,56.603876,2.339559
9,-73.0,-83.0,2023-04-09 17:03:44,5.0,36.66698,-1.228131,-0.025633,-0.006383,0.000785,359.113269,28.623838,1.93263,55.186904,2.337351


In [None]:
predictors_cmap.isna().sum()

In [None]:
pd.DataFrame.to_csv(predictors_cmap, 'data_ingest/data/original/predictors_cmap.csv', index=True)