In [1]:
import pickle
import pandas as pd
import pickle
import geopandas as gpd

In [2]:
# Read in all the geopandas dataframes for the environmental parameters
env_params = {"NO3":"nitrate", "NH4":"ammonium", "PAR_avg":"illumination", "PO4":"phosphate", "SALT":"salinity", "TEMP":"temperature"}
dict_env_dfs = {}
for science_name in env_params.keys():
    env_df = pd.read_pickle("nw_"+science_name+"_3_months_pickle.pkl")
    env_df.reset_index(inplace=True)
    env_df.columns = ["time", "TLONG", "TLAT", env_params[science_name], "geometry"]
    # Make sure that none of the values (except temperature) are above zero
    # DUe to the way things are calculated in the climate model, those values 
    # can become 0, but it does not make any sense
    if science_name != "TEMP":
        env_df[env_params[science_name]] = env_df[env_params[science_name]].clip(lower=0)
    dict_env_dfs[science_name] = gpd.GeoDataFrame(env_df)

In [3]:
dict_env_dfs[list(dict_env_dfs.keys())[0]]

Unnamed: 0,time,TLONG,TLAT,nitrate,geometry
0,0005-02-01 00:00:00,320.562509,-79.220523,,POINT (320.563 -79.221)
1,0005-02-01 00:00:00,321.687509,-79.220523,,POINT (321.688 -79.221)
2,0005-02-01 00:00:00,322.812509,-79.220523,,POINT (322.813 -79.221)
3,0005-02-01 00:00:00,323.937509,-79.220523,,POINT (323.938 -79.221)
4,0005-02-01 00:00:00,325.062509,-79.220523,,POINT (325.063 -79.221)
...,...,...,...,...,...
368635,0005-04-01 00:00:00,318.056707,72.219448,,POINT (318.057 72.219)
368636,0005-04-01 00:00:00,318.486942,72.206084,,POINT (318.487 72.206)
368637,0005-04-01 00:00:00,318.918383,72.196039,,POINT (318.918 72.196)
368638,0005-04-01 00:00:00,319.350688,72.189332,,POINT (319.351 72.189)


In [4]:
# Assert if they all have the same geometry
# This is needed so we can use the geometry of all dfs interchangeably
list_env_dfs_geometry = [dict_env_dfs[env_param]["geometry"] for env_param in env_params.keys()]
i = 0
while i < len(list_env_dfs_geometry) -1:
    assert list_env_dfs_geometry[i].equals(list_env_dfs_geometry[i+1])
    i += 1

In [5]:
# Create all the groupby objects
dict_env_dfs_grouped = {env_param: dict_env_dfs[env_param].groupby(["TLAT", "TLONG"]) for env_param in env_params.keys()}

In [6]:
data_dict = {}
# Itereate over all the lat_lon combos, those are the same for all environmental parameters
for lat_lon in dict_env_dfs_grouped["NO3"].groups.keys():
    list_env_param_latlon_df = []
    for env_param in env_params.keys():
        env_param_latlon_df = dict_env_dfs_grouped[env_param].get_group(lat_lon)
        env_param_latlon_df.set_index("time", inplace=True)
        list_env_param_latlon_df.append(pd.DataFrame(env_param_latlon_df))
    concat_latlon_dfs = pd.concat(list_env_param_latlon_df, axis=1)
    # Remove duplicate columns
    concat_latlon_dfs = concat_latlon_dfs.loc[:,~concat_latlon_dfs.columns.duplicated()].copy()
    # Convert back to geodataframe before saving
    data_dict[lat_lon] = gpd.GeoDataFrame(concat_latlon_dfs)

In [7]:
# Make pickle out of it, so we don't have to run this every time
with open ("data_gridded_all_parameters.pkl", "wb") as handle:
    pickle.dump(data_dict, handle, protocol = pickle.HIGHEST_PROTOCOL)

In [8]:
data_dict[list(data_dict.keys())[0]]

Unnamed: 0_level_0,TLONG,TLAT,nitrate,geometry,ammonium,illumination,phosphate,salinity,temperature
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0005-02-01 00:00:00,1.0625,-79.220523,,POINT (1.06250 -79.22052),,,,,
0005-03-01 00:00:00,1.0625,-79.220523,,POINT (1.06250 -79.22052),,,,,
0005-04-01 00:00:00,1.0625,-79.220523,,POINT (1.06250 -79.22052),,,,,
