In [9]:
%load_ext autoreload
%autoreload 2

In [10]:
from datetime import datetime

import matplotlib
import numpy as np
import pandas as pd
import seaborn as sns
import xarray as xr
from matplotlib import cm
from matplotlib import pyplot as plt
from Turbulence_processing.Scaling import *

## open data

In [29]:
path = "../../../../Dataset/MetCrax/"
ds = xr.open_dataset(path + "MetCrax_full_30min.nc")
ds_low = xr.open_dataset(path + "MetCrax_lowfreq_30min.nc")

In [30]:
# BLH
BLH = xr.open_dataset(path + "Analysis/MetCrax_BLH_ERA5.nc")
BLH = BLH.blh

## select daytime

In [34]:
def select_times(ds):
    # in UTC time
    hour = ds.time.dt.hour
    day = (hour >= 15) & (hour <= 23)
    night = (hour >= 1) & (hour <= 14)
    return night, day

In [35]:
# select daytime
ds = ds.where(select_times(ds)[1], drop=True)
ds_low = ds_low.where(select_times(ds_low)[1], drop=True)
BLH = BLH.where(select_times(BLH)[1], drop=True)

## add scaled variables

In [38]:
ds = Gradients_Anisotropy.Anisotropy_calculation(ds, RGB=False)

ds = Gradients_Anisotropy.Gradients_calculation_splines_1ds(
    ds, ds_low, roughness_length=0.02
)

In [39]:
# add variables
z0 = 0.02
# Obukhov length
L = -ds.ustar**3 * ds.meanT / (0.4 * 9.81 * ds.wT)
# free convection velocity
wfc = (9.81 / ds.meanT * ds.wT * BLH) ** (1 / 3)
tkeprod_shear = ds.uw * ds.gradU
tkeprod_buoy = 9.81 / ds.meanT * ds.wT

ds = ds.assign(
    # height / BLH
    z_zi=ds.heights / BLH,
    # zeta stability parameter
    zeta=-1 / L * ds.heights,
    # Saleski roll to cell
    zi_L=BLH/L,
    # zeta with blh (heisel chamecki 23)
    zeta_blh=1 / np.sqrt(-L * BLH) * ds.heights,
    # z / z0
    z_z0=ds.heights / z0,
    # integral time  / memory
    tw_te=ds.intlenW / ds.meanU / ds.tke * ds.epsU,
    tw_te_u_wfc=ds.intlenW / wfc / ds.tke * ds.epsU,
    tw_te_u_ust=ds.intlenW / ds.ustar / ds.tke * ds.epsU,
    # rapid distortion
    rapid_dist=np.abs(ds.gradU) * ds.tke / ds.epsU,
    rapid_dist_neut=ds.ustar / 0.4 / ds.heights * ds.tke / ds.epsU,
    # Rayleigh number
    Ra=9.81
    / ds.meanT
    * (ds.meanT.isel(heights=0) - ds.meanT.isel(heights=-1))
    * BLH**3
    / (1.5e-5 * 1.9e-5),
    # free convection velocity
    U_wfc=ds.meanU / wfc,
    # dynamical stuff
    U_gradU_z=ds.meanU / ds.gradU / ds.heights,
    U_gradU_L=ds.meanU / ds.gradU / L,
    U_ust=ds.meanU / ds.ustar,
    # tke and stress budget
    # transport
    dz_wtke_eps=ds.wtke.differentiate(coord="heights") / ds.epsU,
    # production
    tkeprod_shear_eps=tkeprod_shear / ds.epsU,
    tkeprod_buoy_eps=tkeprod_buoy / ds.epsU,
    # lengthscales from Ghannam et al 2018
    ust3_prod_z=ds.ustar**3 / tkeprod_shear / ds.heights,
    ust3_eps_z=ds.ustar**3 / ds.epsU / ds.heights,
    # turbulent diffusion
    dz_uuw_eps=ds.uuw.differentiate(coord="heights") / ds.epsU,
    dz_uvw_eps=ds.uvw.differentiate(coord="heights") / ds.epsU,
    dz_uww_eps=ds.uww.differentiate(coord="heights") / ds.epsU,
    dz_vvw_eps=ds.vvw.differentiate(coord="heights") / ds.epsU,
    dz_vww_eps=ds.vww.differentiate(coord="heights") / ds.epsU,
    dz_www_eps=ds.www.differentiate(coord="heights") / ds.epsU,
    # moments
    kurt_u=ds.uuuu / ds.uu**2,
    kurt_v=ds.vvvv / ds.vv**2,
    kurt_w=ds.wwww / ds.ww**2,
    skew_u=ds.uuu / ds.uu**1.5,
    skew_v=ds.vvv / ds.vv**1.5,
    skew_w=ds.www / ds.ww**1.5,
    uv_tke=ds.uv / ds.tke,
    vw_tke=ds.vw / ds.tke,
    uw_tke=ds.uw / ds.tke,
    uT_wT=ds.uT / ds.wT,
    vT_wT=ds.vT / ds.wT,
    vw_uw=ds.vw / ds.uw,
    uv_uw=ds.uv / ds.uw,
    skew_T=ds.TTT / ds.TT**1.5,
    kurt_T=ds.TTTT / ds.TT**2,
)

In [40]:
ds = ds.drop(
    [
        "StatU",
        "StatV",
        "StatW",
        "StatT",
        "StatUW",
        "StatVW",
        "StatUT",
        "StatVT",
        "StatWT",
        "cutoff",
        "cutoffW",
        "gradU",
        "gradT",
        "meanU",
        "meanT",
        "uu",
        "vv",
        "ww",
        "uv",
        "uw",
        "vw",
        "TT",
        "uT",
        "vT",
        "wT",
        "tke",
        "xb",
        "ustar",
        "LLJ",
        "uuu",
        "vvv",
        "www",
        "TTT",
        "uuuu",
        "vvvv",
        "wwww",
        "TTTT",
        "epsU",
        "epsV",
        "epsW",
        "epsT",
        "epsUsf",
        "epsVsf",
        "epsWsf",
        "slopeHU",
        "slopeHV",
        "slopeHW",
        "slopeHT",
        "slopeLU",
        "slopeLV",
        "slopeLW",
        "slopeLT",
        "uuv",
        "uuw",
        "uvw",
        "uvv",
        "uww",
        "vvw",
        "vww",
        "vvu",
        "utke",
        "vtke",
        "wtke",
        "wwT",
        "uwT",
        "vwT",
        "uTT",
        "vTT",
        "wTT",
        "dir",
        "intlenW",
        "intlenU",
        "intlenV",
        "slopeHUsf",
        "slopeHVsf",
        "slopeHWsf",
    ]
)

In [41]:
list(ds.data_vars)

['yb',
 'Ri',
 'Rif',
 'z_zi',
 'zeta',
 'zi_L',
 'zeta_blh',
 'z_z0',
 'tw_te',
 'tw_te_u_wfc',
 'tw_te_u_ust',
 'rapid_dist',
 'rapid_dist_neut',
 'Ra',
 'U_wfc',
 'U_gradU_z',
 'U_gradU_L',
 'U_ust',
 'dz_wtke_eps',
 'tkeprod_shear_eps',
 'tkeprod_buoy_eps',
 'ust3_prod_z',
 'ust3_eps_z',
 'dz_uuw_eps',
 'dz_uvw_eps',
 'dz_uww_eps',
 'dz_vvw_eps',
 'dz_vww_eps',
 'dz_www_eps',
 'kurt_u',
 'kurt_v',
 'kurt_w',
 'skew_u',
 'skew_v',
 'skew_w',
 'uv_tke',
 'vw_tke',
 'uw_tke',
 'uT_wT',
 'vT_wT',
 'vw_uw',
 'uv_uw',
 'skew_T',
 'kurt_T']

### merge and save

In [43]:
ds = xr.merge([ds])

In [44]:
ds.to_netcdf("Metcrax_forest_data.nc")

## pass to pandas and select groups
selected for test 8may B1 middle clouds, 23 may S3 clear sky and 11 june W3 clear sky

In [46]:
ds = xr.open_dataset("Metcrax_forest_data.nc")

In [47]:
# stack ds
ds = (
    ds.stack(index=("time", "heights"))
    .reset_index("index")
    .dropna(dim="index", how="any")
)

In [49]:
# hour that starts the day
day_start = 14

# Define group labels
dates = pd.to_datetime(ds.time)
groups = (
    dates
    - pd.to_datetime(datetime(dates[0].year, dates[0].month, dates[0].day, day_start))
).days

ds = ds.assign(groups=(["index"], groups + 1))

# test group
test_groups = [7, 22]

# split
train_bool = np.invert(np.isin(groups, test_groups))
groups_train = groups[train_bool]
ds_train = ds.where(train_bool).dropna(dim="index")
ds_test = ds.where(np.invert(train_bool)).dropna(dim="index")

In [50]:
# Pandas
vars = list(ds.data_vars)
vars.remove("yb")
X_train = ds_train.to_dataframe()[vars]
y_train = ds_train.to_dataframe()["yb"]
X_test = ds_test.to_dataframe()[vars]
y_test = ds_test.to_dataframe()["yb"]


# drop groups where not necessary
X_test = X_test.drop(columns="groups")

In [51]:
# save
X_train.to_csv("xtrain.csv")
y_train.to_csv("ytrain.csv")
X_test.to_csv("xtest.csv")
y_test.to_csv("ytest.csv")

In [13]:
X_train = pd.read_csv("./xtrain.csv", index_col=0)
y_train = pd.read_csv("./ytrain.csv", index_col=0)
X_test = pd.read_csv("./xtest.csv", index_col=0)
y_test = pd.read_csv("./ytest.csv", index_col=0)
groups = X_train['groups']
X_train = X_train.drop(columns='groups')

In [15]:
X_train

Unnamed: 0_level_0,Ri,Rif,z_zi,zeta,zi_L,zeta_blh,z_z0,tw_te,tw_te_u_wfc,tw_te_u_ust,...,uv_tke,vw_tke,uw_tke,uT_wT,vT_wT,vw_uw,uv_uw,skew_T,kurt_T,blh
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.124101,-0.076018,0.032868,0.118364,-3.601201,0.062373,150.0,0.024420,0.090432,0.188127,...,0.098967,-0.026994,-0.228636,-2.667458,-0.508222,0.118065,-0.432856,0.571268,2.882793,91.274746
1,-28.531426,1.879038,0.109559,0.253296,-2.311955,0.166586,500.0,0.050137,0.236283,0.424038,...,0.226621,-0.101726,-0.283058,-1.827216,-0.742294,0.359382,-0.800615,0.644892,3.169677,91.274746
2,-62.166568,3.326271,0.164339,0.330070,-2.008471,0.232902,750.0,0.098559,0.468615,0.802450,...,0.197414,-0.076073,-0.319648,-1.571582,-0.530802,0.237989,-0.617599,0.523106,2.930775,91.274746
3,-12.347433,-2.526553,0.219119,0.477391,-2.178687,0.323427,1000.0,0.050461,0.256258,0.450874,...,0.210024,-0.056744,-0.267865,-1.560577,-0.647825,0.211840,-0.784067,0.758358,4.090787,91.274746
4,0.764718,-0.764432,0.273898,0.513835,-1.876005,0.375151,1250.0,0.040755,0.251441,0.420883,...,0.247791,-0.054841,-0.218375,-1.771368,-0.904664,0.251132,-1.134700,0.716781,4.470969,91.274746
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4762,-2.335953,-0.903817,0.014673,0.398689,-27.170863,0.076486,1500.0,0.025091,0.090970,0.371174,...,0.169890,0.053370,-0.043502,3.678302,2.610073,-1.226854,-3.905381,0.046982,2.263290,2044.517817
4763,-2.907087,-0.883044,0.017119,0.390901,-22.834407,0.081804,1750.0,0.025070,0.097090,0.373839,...,0.175551,0.051683,-0.047120,4.242880,2.762202,-1.096838,-3.725646,-0.018495,2.077199,2044.517817
4764,-3.481836,-1.415687,0.019565,0.545796,-27.897262,0.103336,2000.0,0.033199,0.128176,0.527603,...,0.210707,0.049964,-0.034521,4.135857,2.749553,-1.447330,-6.103694,0.020112,2.207485,2044.517817
4765,-4.051065,-0.996068,0.022010,0.497564,-22.606199,0.104649,2250.0,0.047300,0.195410,0.749899,...,0.250022,0.037538,-0.044449,4.807947,3.319800,-0.844507,-5.624879,0.135008,2.665295,2044.517817
