In [None]:
#Apply logs, remove negatives, ect
#Add year and month
#Split train test (and other generalizations)
#Cut dataframe into splits

In [1]:
from pathlib import Path
import pickle
root = Path(r'0.4_dataset_preprocessing.ipnyb').absolute().parent.parent.parent
with open(str(root)+r'\data\processed_data\0.2_LANDSAT_extraction.pickle', 'rb') as handle:
    dataset = pickle.load(handle)


In [2]:
dataset["surface_net_solar_radiation_min"]

0       -0.25
1        0.00
2        0.00
3        0.00
4        0.00
         ... 
77065    0.00
77066   -0.50
77067   -0.50
77068   -0.50
77069   -0.50
Name: surface_net_solar_radiation_min, Length: 73625, dtype: float64

In [3]:
dataset['lake_elevation_m']

0         46.299999
1        347.899994
2        347.899994
3        347.899994
4        347.899994
            ...    
77065    510.500000
77066    510.500000
77067    510.500000
77068    510.500000
77069    510.500000
Name: lake_elevation_m, Length: 73625, dtype: float64

In [4]:
dataset['total_precipitation_sum'].isna().any()

False

In [5]:
dataset.isna().any().any()

False

In [6]:
dataset.lagoslakeid_x.unique().shape

(4138,)

In [7]:
ind_vars_to_log = ['secchi_m','chla_ugl','tn_ugl','tp_ugl','sio2_mgl','salinity_ppm']
features_to_log = ['lake_elevation_m', 'lake_totalarea_ha', 'lake_perimeter_m', 'lake_shorelinedevfactor', 'lake_mix_layer_temperature', 'lake_mix_layer_temperature_min', 'lake_mix_layer_temperature_max', 'lake_mix_layer_depth', 'lake_mix_layer_depth_min', 'lake_mix_layer_depth_max', 'total_precipitation_sum', 'total_precipitation_min', 'total_precipitation_max']
SR_bands = ['SR_B1','SR_B2','SR_B3','SR_B4','SR_B5','SR_B6','SR_B7']
remove_negatives = ['lake_mix_layer_depth', 'lake_mix_layer_depth_min', 'lake_mix_layer_depth_max','total_precipitation_sum', 'total_precipitation_min', 'total_precipitation_max','surface_net_solar_radiation_sum','surface_net_solar_radiation_min','surface_net_solar_radiation_max','parameter_value']
import math
num_to_param = {30:'secchi_m',9:'chla_ugl',21:'tn_ugl',23:'do_mgl',25:'ph',27:'tp_ugl',31:'sio2_mgl',48:'salinity_ppm'}

def log(x):
    if(x>0):
        return math.log(x+1)
    if(x<=0):
        return -math.log(-x+1)

    
def log_if_var_type(row, ind_vars_to_log):
    if(num_to_param[row.parameter_id_x] in ind_vars_to_log):
        row["parameter_value"] = log(row["parameter_value"])
    return row

def zero_if_negative(value):
    if(value<0):
        return 0
    return value

def solar_reduce(value):
    return value/1000000 #Convert J/m^2 to J/km^2


def log_modification(dataset, ind_vars_to_log, features_to_log):
        dataset = dataset.apply(lambda x: log_if_var_type(x, ind_vars_to_log), axis = 1)

        for column in remove_negatives:
            dataset[column]=dataset[column].map(zero_if_negative)
        for solar_band in ['surface_net_solar_radiation_min','surface_net_solar_radiation_max','surface_net_solar_radiation_sum']:
            dataset[solar_band] = dataset[solar_band].map(solar_reduce)
    
        for temp_col in ['lake_mix_layer_temperature', 'lake_mix_layer_temperature_min', 'lake_mix_layer_temperature_max']:
            dataset[temp_col] = dataset[temp_col].map(lambda x: x-273) #convert to C
        
        for feature in features_to_log:
            dataset[feature] = dataset[feature].map(log)
            
        for SR_band in SR_bands:
            dataset[SR_band +" point"] = dataset[SR_band+" point"].map(lambda x: x*0.0000275-0.2)
            dataset[SR_band +" mean"] = dataset[SR_band+" mean"].map(lambda x: x*0.0000275-0.2)
            dataset[SR_band +" median"] = dataset[SR_band+" median"].map(lambda x: x*0.0000275-0.2)

        dataset['year'] = dataset['sample_date'].map(lambda x: x.split('-')[0][-2:])
        dataset['month'] = dataset['sample_date'].map(lambda x: x.split('-')[1])
        return dataset
    
dataset = log_modification(dataset, ind_vars_to_log, features_to_log)

In [15]:
from pathlib import Path
import pickle
root = Path(r'0.4_dataset_preprocessing.ipnyb').absolute().parent.parent.parent
with open(str(root)+r'\data\processed_data\0.4_unsplit_data.pickle', 'wb') as handle:
    pickle.dump(dataset,handle)

In [18]:
import numpy as np

time_generalizability_test = dataset[(dataset.year=='21') | (dataset.year == '20')] 
dataset = dataset[(dataset.year!='21') & (dataset.year != '20')]

np.random.seed(seed=0)
lake_20_percent = np.random.choice(dataset.lagoslakeid_x.unique(), int(len(list(dataset.lagoslakeid_x.unique()))/5), replace=False) 
spatial_generalizability_test = dataset[dataset.lagoslakeid_x.map(lambda x: np.isin(x,lake_20_percent))]
dataset = dataset[dataset.lagoslakeid_x.map(lambda x: np.isin(x,lake_20_percent))==False]

#Prob want to shift around ratios
test_train_split_dataset = {'train':dataset,'time_test':time_generalizability_test,'space_test':spatial_generalizability_test}


In [48]:
time_generalizability_test

Unnamed: 0,sample_date,source_samplesite_lat_dd,source_samplesite_lon_dd,lake_mix_layer_temperature,lake_mix_layer_temperature_min,lake_mix_layer_temperature_max,lake_mix_layer_depth,lake_mix_layer_depth_min,lake_mix_layer_depth_max,u_component_of_wind_10m,...,lake_namegnis,lake_namelagos,lake_lat_decdeg,lake_lon_decdeg,lake_elevation_m,lake_totalarea_ha,lake_perimeter_m,lake_shorelinedevfactor,year,month
43,2020-06-21,45.512550,-95.667220,3.049667,3.030627,3.064470,1.944211,1.816357,2.005221,1.093189,...,Lake Emily,Lake Emily,45.514465,-95.664404,5.798790,6.819979,9.845490,1.015229,20,06
44,2020-06-21,45.512550,-95.667220,3.049667,3.030627,3.064470,1.944211,1.816357,2.005221,1.093189,...,Lake Emily,Lake Emily,45.514465,-95.664404,5.798790,6.819979,9.845490,1.015229,20,06
45,2020-06-21,45.512550,-95.667220,3.049667,3.030627,3.064470,1.944211,1.816357,2.005221,1.093189,...,Lake Emily,Lake Emily,45.514465,-95.664404,5.798790,6.819979,9.845490,1.015229,20,06
46,2020-08-16,45.512550,-95.667220,3.207315,3.148330,3.304561,1.778591,1.306007,1.956319,1.640657,...,Lake Emily,Lake Emily,45.514465,-95.664404,5.798790,6.819979,9.845490,1.015229,20,08
47,2020-08-16,45.512550,-95.667220,3.207315,3.148330,3.304561,1.778591,1.306007,1.956319,1.640657,...,Lake Emily,Lake Emily,45.514465,-95.664404,5.798790,6.819979,9.845490,1.015229,20,08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77065,2020-07-06,46.209675,-89.473688,3.155311,3.078637,3.189795,1.723191,1.604936,1.947026,1.023577,...,Crampton Lake,Crampton Lake,46.209658,-89.473671,6.237348,3.330146,8.119472,1.039408,20,07
77066,2021-05-04,46.209675,-89.473688,1.904448,1.894272,1.915924,2.459105,2.459105,2.459105,1.956756,...,Crampton Lake,Crampton Lake,46.209658,-89.473671,6.237348,3.330146,8.119472,1.039408,21,05
77067,2021-05-04,46.209675,-89.473688,1.904448,1.894272,1.915924,2.459105,2.459105,2.459105,1.956756,...,Crampton Lake,Crampton Lake,46.209658,-89.473671,6.237348,3.330146,8.119472,1.039408,21,05
77068,2021-05-04,46.209675,-89.473688,1.904448,1.894272,1.915924,2.459105,2.459105,2.459105,1.956756,...,Crampton Lake,Crampton Lake,46.209658,-89.473671,6.237348,3.330146,8.119472,1.039408,21,05


In [49]:
test_train_split_dataset['train']['lake_elevation_m']

0        3.856510
11       6.154646
12       6.154646
13       6.154646
14       6.154646
           ...   
76931    7.700793
76932    7.700793
76933    7.700793
76934    7.700793
76935    7.700793
Name: lake_elevation_m, Length: 53381, dtype: float64

In [50]:
test_train_split_dataset.keys()

dict_keys(['train', 'time_test', 'space_test'])

In [51]:
test_train_split_dataset['train']['lake_elevation_m']

0        3.856510
11       6.154646
12       6.154646
13       6.154646
14       6.154646
           ...   
76931    7.700793
76932    7.700793
76933    7.700793
76934    7.700793
76935    7.700793
Name: lake_elevation_m, Length: 53381, dtype: float64

In [52]:
reduction_methods = [' point',' mean',' median']
common_vars = ['lagoslakeid_x','parameter_id_x','parameter_value','source_samplesite_lat_dd','source_samplesite_lon_dd','lake_elevation_m','lake_totalarea_ha','lake_perimeter_m','lake_shorelinedevfactor','month','year','lake_mix_layer_temperature',
       'lake_mix_layer_temperature_min', 'lake_mix_layer_temperature_max',
       'lake_mix_layer_depth', 'lake_mix_layer_depth_min',
       'lake_mix_layer_depth_max', 'u_component_of_wind_10m',
       'u_component_of_wind_10m_min', 'u_component_of_wind_10m_max',
       'v_component_of_wind_10m', 'v_component_of_wind_10m_min',
       'v_component_of_wind_10m_max', 'total_precipitation_sum',
       'total_precipitation_min', 'total_precipitation_max',
       'surface_net_solar_radiation_sum', 'surface_net_solar_radiation_min',
       'surface_net_solar_radiation_max']


split_dataframes = {}
for reduction_method in reduction_methods:
    reduction_type_dataset = {}
    for key, val in test_train_split_dataset.items():
        cut_dataframe = val[common_vars].copy(deep = True)
        for band in ['SR_B1','SR_B2','SR_B3','SR_B4','SR_B5','SR_B6','SR_B7','SR_QA_AEROSOL','QA_PIXEL','QA_RADSAT']:
            cut_dataframe[band]=val[band+reduction_method]

        reduction_type_dataset[key] = cut_dataframe

    split_dataframes[reduction_method] = reduction_type_dataset


In [53]:
len(common_vars)

29

In [54]:
len(list(dict.fromkeys(common_vars)))

29

In [55]:
#Split into individual parameters, set up test_train split

independent_vars = ['source_samplesite_lat_dd','source_samplesite_lon_dd','lake_elevation_m','lake_totalarea_ha','lake_perimeter_m','lake_shorelinedevfactor','month','year','lake_mix_layer_temperature',
       'lake_mix_layer_temperature_min', 'lake_mix_layer_temperature_max',
       'lake_mix_layer_depth', 'lake_mix_layer_depth_min',
       'lake_mix_layer_depth_max', 'u_component_of_wind_10m',
       'u_component_of_wind_10m_min', 'u_component_of_wind_10m_max',
       'v_component_of_wind_10m', 'v_component_of_wind_10m_min',
       'v_component_of_wind_10m_max', 'total_precipitation_sum',
       'total_precipitation_min', 'total_precipitation_max',
       'surface_net_solar_radiation_sum', 'surface_net_solar_radiation_min',
       'surface_net_solar_radiation_max','SR_B1','SR_B2','SR_B3','SR_B4','SR_B5','SR_B6','SR_B7']
dependent_var = 'parameter_value'

for reduction_method in reduction_methods:
    
    for key, val in split_dataframes[reduction_method].items():
        dataset_by_ind_var = {}

        for parameter in val.parameter_id_x.unique():
            dataset_by_ind_var[parameter]=val[val.parameter_id_x==parameter]
        split_dataframes[reduction_method][key] = dataset_by_ind_var

In [56]:
len(independent_vars)

33

In [57]:
len(independent_vars) == len(set(independent_vars))

True

In [58]:
from sklearn.model_selection import train_test_split

num_to_param = {30:'secchi_m',9:'chla_ugl',21:'tn_ugl',23:'do_mgl',25:'ph',27:'tp_ugl',31:'sio2_mgl',48:'salinity_ppm'}
for reduction_method, test_train_split in split_dataframes.items():
    train = test_train_split['train']
    for param, val in train.items():
        X_train, X_test, y_train, y_test = train_test_split(val[independent_vars].to_numpy(),val[dependent_var].to_numpy(), test_size=0.30, random_state=42)
    
        split_dataframes[reduction_method]['train'][param] = {'X_train':X_train,'X_test': X_test, 'y_train': y_train, 'y_test':y_test}


        

In [59]:
split_dataframes[' point']['train'][30]['X_train'][24]

array([45.025136, -87.155641, 5.210578435580641, 6.167636097299151,
       9.655371961190179, 1.1103024046889494, '07', '19',
       3.0316104267802135, 3.017670047038899, 3.04541721957838,
       2.234655411700404, 2.0996222760997023, 2.2847512295348844,
       -1.6881533861160278, -4.5746660232543945, 0.1396942138671875,
       2.186424652735392, 0.7622966766357422, 4.040635108947754,
       8.583065114466866e-07, -0.0, 8.583065114466866e-07, 26.295592, 0.0,
       2.970906, 0.005039999999999989, 0.016094999999999998,
       0.041917499999999996, 0.01337250000000001, 0.008504999999999985,
       0.008532499999999998, 0.007487500000000008], dtype=object)

In [60]:
#Extract space and time datasets
for reduction_method, dataset_block in split_dataframes.items():
    tests = ['space_test', 'time_test']
    for test in tests:
        parameter_blocks = dataset_block[test]
        for par,parameter_block in parameter_blocks.items():
            dependent = parameter_block[dependent_var].to_numpy()
            independent = parameter_block[independent_vars].to_numpy()
            split_dataframes[reduction_method][test][par] = {'independent':independent,'dependent':dependent}

In [90]:
with open(str(root)+r"\data\processed_data\0.4_dataset_preprocessing.pickle", "wb") as handle:
    pickle.dump(split_dataframes, handle)

In [103]:
from pathlib import Path
import pickle
root = Path(r'0.4_dataset_preprocessing.ipnyb').absolute().parent.parent.parent
with open(str(root)+r"\data\processed_data\0.4_dataset_preprocessing.pickle", "rb") as handle:
   dat = pickle.load(handle)

In [104]:
#Extract spatial testing example

In [19]:
lake_20_percent

array([   767,  69323,   4606,   7444,   2500, 272820,   3449,   8295,
        84678, 292017,   5067, 152360, 116597,   1973,   5743,  21711,
         6314,  12398, 157614,   4696,  83671,  64605,   4905, 457037,
        61409, 361292, 119801,   6521, 193439,   3607,  67996,   1819,
         2251, 196849,   5516,  19016,  42197,   3071,   4397,   6488,
         7882,  81842,   5677,   2066, 294401,   1417, 234830,  38846,
         3886, 197273,  63172,  57856,    209,  80721, 208700, 137219,
         4576, 316654,   4598,   2095,   1666,   2471,   2347,   5268,
       349624, 211230, 317385,   6425, 147889,   4916,   3995,  35744,
        67057,   1864,   4171,  70058,   6137,    620,   4240, 111491,
         3993,   5057,   5993, 189320,   3288,    467,  99424,   5081,
         4611,   2694,   3116,    292,   5847,   5690, 126047, 464739,
         3293,   4957,   1917,   3411,  85766,   4586, 137783, 433888,
       213492,  86742, 189281,   2319,   2046,   6076,    882,  48136,
      

In [188]:
lake_20_percent

array([  3487, 453580,   3196,   6254,   1892,  63186,   5649, 196852,
       170850, 144208,  60860, 123950, 189910,   4581,   4784,   5288,
          305, 208912, 387035,   5911,   2754,  50758,   3383,   2939,
        29033,   5369, 189772,   3474, 222430,   5763,   4528,  88890,
       450081,    564,   2836,   5028, 191335, 101152,  36735,  90920,
       236201,   2589,   4818, 156501,   4923, 376419,   5549,   1515,
       191355,   5110,   5501,   6042,   1435,   1699,  51822,  18995,
       357823,  91905, 464743,    637, 452632, 452797, 393135,   6548,
          635,  90961,   5465,    191, 455062, 447616,   6812,  80769,
       194821, 170065,   6471,   6479,   6301, 375816,   2815,   4585,
       339036,   5488, 205251, 287040,   1601, 273908, 189344,   3900,
         4876, 464462,   2767, 102166,   5217,   3501,   4522, 466135,
         5521,   2955,  60737,   6490, 132654, 479260,  66241,   5380,
       450402,   3208,  21411,   4887,   5104, 187326,   2529, 450860,
      

In [107]:
from pathlib import Path
root = Path(r'0.2_LANDSAT_extraction.ipnyb').absolute().parent.parent.parent
import pickle
with open(str(root)+r'\data\processed_data\0.2_LANDSAT_extraction.pickle', 'rb') as handle:
    dat = pickle.load(handle)

import ee
import re
ee.Authenticate()
ee.Initialize(project='lake-images-ee')

LANDSAT = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")
L_Bands = LANDSAT.first().bandNames().getInfo()

p = re.compile('^ST.*')
L_Bands = [s for s in L_Bands if not p.match(s)]


ERA_5 = ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR") #https://developers.google.com/earth-engine/datasets/catalog/ECMWF_ERA5_LAND_MONTHLY_AGGR
ERA_5_bands = ["lake_mix_layer_temperature","lake_mix_layer_temperature_min","lake_mix_layer_temperature_max","lake_mix_layer_depth","lake_mix_layer_depth_min","lake_mix_layer_depth_max","u_component_of_wind_10m","u_component_of_wind_10m_min","u_component_of_wind_10m_max","v_component_of_wind_10m","v_component_of_wind_10m_min","v_component_of_wind_10m_max","total_precipitation_sum","total_precipitation_min","total_precipitation_max", "surface_net_solar_radiation_sum", "surface_net_solar_radiation_min", "surface_net_solar_radiation_max"]


In [112]:
img = ee.Image("LANDSAT/LC08/C02/T1_L2/"+dat[dat.lagoslakeid_x==item].image_id.iloc[0])

In [113]:
import pandas as pd
import geopandas as gpd
import csv
import os

path_geo_inf = str(root.joinpath(r"data\raw_data\LAGOS_LOCUS\lake_information.csv"))
geo_inf_dat = pd.read_csv(path_geo_inf)

In [115]:
geo_inf_dat[geo_inf_dat.lagoslakeid==item]

Unnamed: 0,lagoslakeid,lake_nhdid,lake_nhdfcode,lake_nhdftype,lake_reachcode,lake_namegnis,lake_namelagos,lake_onlandborder,lake_ismultipart,lake_missingws,...,hu8_zoneid,hu4_zoneid,county_zoneid,state_zoneid,epanutr_zoneid,omernik3_zoneid,wwf_zoneid,mlra_zoneid,bailey_zoneid,neon_zoneid
300845,767,80997263,39004,390,9030001000000.0,Boot Lake,Boot Lake; Haven Lake,N,Y,N,...,hu8_130,hu4_26,county_434,state_14,epanutr_7,omernik3_48,wwf_3,mlra_215,bailey_11,neon_5


In [133]:
dat[dat.lagoslakeid_x==7444]

Unnamed: 0,sample_date,source_samplesite_lat_dd,source_samplesite_lon_dd,lake_mix_layer_temperature,lake_mix_layer_temperature_min,lake_mix_layer_temperature_max,lake_mix_layer_depth,lake_mix_layer_depth_min,lake_mix_layer_depth_max,u_component_of_wind_10m,...,parameter_id_x,parameter_value,lake_namegnis,lake_namelagos,lake_lat_decdeg,lake_lon_decdeg,lake_elevation_m,lake_totalarea_ha,lake_perimeter_m,lake_shorelinedevfactor
39629,2016-10-11,37.49129,-88.91389,289.210775,288.908447,289.477783,5.0,5.0,5.0,-0.622111,...,9,26.7,Dutchman Lake,Dutchman Lake,37.491801,-88.916459,128.800003,41.131333,7288.796425,3.206007
39630,2016-10-11,37.49129,-88.91389,289.210775,288.908447,289.477783,5.0,5.0,5.0,-0.622111,...,23,6.01,Dutchman Lake,Dutchman Lake,37.491801,-88.916459,128.800003,41.131333,7288.796425,3.206007
39631,2016-10-11,37.49129,-88.91389,289.210775,288.908447,289.477783,5.0,5.0,5.0,-0.622111,...,25,7.45,Dutchman Lake,Dutchman Lake,37.491801,-88.916459,128.800003,41.131333,7288.796425,3.206007
39632,2016-10-11,37.49129,-88.91389,289.210775,288.908447,289.477783,5.0,5.0,5.0,-0.622111,...,27,35.0,Dutchman Lake,Dutchman Lake,37.491801,-88.916459,128.800003,41.131333,7288.796425,3.206007
39633,2016-10-11,37.49129,-88.91389,289.210775,288.908447,289.477783,5.0,5.0,5.0,-0.622111,...,30,0.6858,Dutchman Lake,Dutchman Lake,37.491801,-88.916459,128.800003,41.131333,7288.796425,3.206007


In [149]:
max = 0
s = 0
for i in range(len(lake_20_percent)):
    item = lake_20_percent[i]
    if(max<dat[dat.lagoslakeid_x==item].sample_date.drop_duplicates().shape[0]):
        max=dat[dat.lagoslakeid_x==item].sample_date.drop_duplicates().shape[0]
        s=item
print(s)
print(max)

3422
111


In [153]:
list(dat[dat.lagoslakeid_x==3422].sample_date)

['2013-06-07',
 '2013-06-13',
 '2013-06-19',
 '2013-06-27',
 '2013-07-02',
 '2013-08-01',
 '2013-08-05',
 '2013-08-11',
 '2013-08-15',
 '2013-08-22',
 '2013-08-26',
 '2013-08-28',
 '2013-08-31',
 '2013-09-06',
 '2014-07-19',
 '2014-07-20',
 '2014-07-22',
 '2014-07-23',
 '2014-07-26',
 '2014-07-31',
 '2014-08-07',
 '2014-08-08',
 '2014-08-09',
 '2014-08-10',
 '2014-08-13',
 '2014-08-21',
 '2015-06-11',
 '2015-06-11',
 '2015-06-11',
 '2015-06-11',
 '2015-06-11',
 '2015-06-18',
 '2015-06-25',
 '2015-06-29',
 '2015-07-07',
 '2015-07-07',
 '2015-07-07',
 '2015-07-07',
 '2015-07-07',
 '2015-07-26',
 '2015-08-01',
 '2015-08-04',
 '2015-08-04',
 '2015-08-04',
 '2015-08-04',
 '2015-08-04',
 '2015-08-07',
 '2015-08-10',
 '2015-08-17',
 '2016-05-02',
 '2016-05-08',
 '2016-05-16',
 '2016-06-10',
 '2016-06-13',
 '2016-06-18',
 '2016-06-23',
 '2016-07-01',
 '2016-07-15',
 '2016-07-19',
 '2016-07-21',
 '2016-07-26',
 '2016-07-26',
 '2016-07-26',
 '2016-07-26',
 '2016-07-26',
 '2016-07-26',
 '2016-08-

In [155]:
dat[dat.lagoslakeid_x==3422]

Unnamed: 0,sample_date,source_samplesite_lat_dd,source_samplesite_lon_dd,lake_mix_layer_temperature,lake_mix_layer_temperature_min,lake_mix_layer_temperature_max,lake_mix_layer_depth,lake_mix_layer_depth_min,lake_mix_layer_depth_max,u_component_of_wind_10m,...,parameter_id_x,parameter_value,lake_namegnis,lake_namelagos,lake_lat_decdeg,lake_lon_decdeg,lake_elevation_m,lake_totalarea_ha,lake_perimeter_m,lake_shorelinedevfactor
22569,2013-06-07,47.423451,-93.551676,281.458659,281.322510,281.598877,11.771077,8.828125,13.961914,-0.712186,...,30,4.1000,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927
22570,2013-06-13,47.423451,-93.551676,283.355184,283.097900,283.911377,12.509725,10.675781,13.772461,-0.789537,...,30,5.2000,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927
22571,2013-06-19,47.423451,-93.551676,286.505615,286.392822,286.616455,8.612142,8.104492,9.566406,-0.220479,...,30,6.7000,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927
22572,2013-06-27,47.423451,-93.551676,290.930257,288.683838,291.968994,7.193726,5.459961,11.707031,3.229375,...,30,6.1000,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927
22573,2013-07-02,47.423451,-93.551676,295.006429,293.922119,296.783447,3.803141,2.648437,4.414062,-0.891966,...,30,6.1000,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22706,2020-09-05,47.423451,-93.551676,288.641032,288.054932,290.016846,10.643514,7.905273,11.857422,2.178596,...,30,4.7244,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927
22707,2020-09-19,47.415313,-93.555207,285.712646,285.514893,285.998291,11.838338,11.294922,12.477539,0.799430,...,30,4.8768,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927
22708,2020-09-21,47.415313,-93.555207,285.556966,285.394775,285.919189,14.748698,13.860352,15.422852,1.670857,...,9,1.9100,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927
22709,2020-09-21,47.415313,-93.555207,285.556966,285.394775,285.919189,14.748698,13.860352,15.422852,1.670857,...,27,5.0000,Bluewater Lake,Bluewater Lake,47.421441,-93.552665,402.0,146.687561,7294.178963,1.698927


In [167]:
df = pd.DataFrame(split_dataframes[' point']['space_test'][30]['independent'], columns = ['source_samplesite_lat_dd','source_samplesite_lon_dd','lake_elevation_m','lake_totalarea_ha','lake_perimeter_m','lake_shorelinedevfactor','month','year','lake_mix_layer_temperature',
       'lake_mix_layer_temperature_min', 'lake_mix_layer_temperature_max',
       'lake_mix_layer_depth', 'lake_mix_layer_depth_min',
       'lake_mix_layer_depth_max', 'u_component_of_wind_10m',
       'u_component_of_wind_10m_min', 'u_component_of_wind_10m_max',
       'v_component_of_wind_10m', 'v_component_of_wind_10m_min',
       'v_component_of_wind_10m_max', 'total_precipitation_sum',
       'total_precipitation_min', 'total_precipitation_max',
       'surface_net_solar_radiation_sum', 'surface_net_solar_radiation_min',
       'surface_net_solar_radiation_max','SR_B1','SR_B2','SR_B3','SR_B4','SR_B5','SR_B6','SR_B7'])

In [175]:
df[df.lake_elevation_m==402.0]

Unnamed: 0,source_samplesite_lat_dd,source_samplesite_lon_dd,lake_elevation_m,lake_totalarea_ha,lake_perimeter_m,lake_shorelinedevfactor,month,year,lake_mix_layer_temperature,lake_mix_layer_temperature_min,...,surface_net_solar_radiation_sum,surface_net_solar_radiation_min,surface_net_solar_radiation_max,SR_B1,SR_B2,SR_B3,SR_B4,SR_B5,SR_B6,SR_B7


In [185]:
df

Unnamed: 0,source_samplesite_lat_dd,source_samplesite_lon_dd,lake_elevation_m,lake_totalarea_ha,lake_perimeter_m,lake_shorelinedevfactor,month,year,lake_mix_layer_temperature,lake_mix_layer_temperature_min,...,surface_net_solar_radiation_sum,surface_net_solar_radiation_min,surface_net_solar_radiation_max,SR_B1,SR_B2,SR_B3,SR_B4,SR_B5,SR_B6,SR_B7
0,41.176146,-74.437597,5.854785,4.642187,9.028933,1.208804,11,14,0.379635,0.139465,...,3.02546,0.0,0.954684,0.002482,0.00097,0.004435,-0.003265,-0.001092,0.004187,0.00295
1,41.183565,-74.432169,5.854785,4.642187,9.028933,1.208804,06,17,2.793773,2.775225,...,4.10352,0.0,0.544058,0.00064,0.003417,0.037353,0.020687,0.087705,0.071727,0.063092
2,44.084473,-93.485967,5.835688,5.547375,8.849469,0.801943,03,16,0.82242,0.722142,...,6.925385,0.0,1.245709,0.000915,0.009385,0.02792,0.019202,0.012465,0.015435,0.014857
3,44.084473,-93.485967,5.835688,5.547375,8.849469,0.801943,06,19,3.081684,3.046067,...,11.26536,0.0,1.402378,0.002537,0.009935,0.022365,0.013813,0.012272,0.014637,0.01384
4,44.084473,-93.485967,5.835688,5.547375,8.849469,0.801943,06,19,2.970711,2.959855,...,14.864718,0.0,2.47086,0.002537,0.009935,0.022365,0.013813,0.012272,0.014637,0.01384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6111,44.42,-111.443611,7.561694,8.028994,11.744176,2.005682,06,13,2.135864,2.103235,...,21.285266,0.0,2.614226,0.01098,0.015352,0.04618,0.026902,0.123482,0.084543,0.050305
6112,44.419722,-111.397222,7.561694,8.028994,11.744176,2.005682,07,19,2.812886,2.758714,...,22.814094,0.0,2.79169,0.001052,0.005205,0.014225,0.002125,0.00559,0.00592,0.005452
6113,43.585833,-118.208611,6.947264,7.424645,10.847971,1.514007,08,19,3.19239,3.100244,...,15.825738,0.0,2.244694,0.00867,0.022007,0.053248,0.035565,-0.00486,0.00306,0.00394
6114,44.394444,-116.896111,6.780831,4.545326,8.781375,1.065601,05,17,2.872697,2.768077,...,21.865064,0.0,2.470218,0.016673,0.024785,0.044695,0.036555,0.137315,0.08446,0.05388
