In [432]:
# import os
import SunStatistics as SS
from scipy.stats import pearsonr
%matplotlib notebook
# import scipy.stats as stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta,datetime
import pytz

from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

G_Drive = 'G:\\My Drive\\'
G_Drive = 'C:\\Users\\User\\Google Drive\\'
Output_Path = G_Drive+'FishIsland_Outputs/'

# Model Functions

In [433]:
def Model_Test_RF(X,y,AllData,K=10,Task = 'Predict',min_samples_split=None,Verbose=True,random_state=1):
    Data = AllData[X+y].dropna()
    kf = KFold(n_splits=K,shuffle=True, random_state = random_state)
    R2 = []
    MSE = []
    Imp = []    
    Pred = []
    rnd=1
    for train_index, test_index in kf.split(Data):
        df_test = Data.iloc[test_index]
        df_train = Data.iloc[train_index]
        X_train = np.array(df_train[X])#.reshape(-1, 1)     
        y_train = np.array(df_train[y]).ravel()#.reshape(-1, 1)
        X_test = np.array(df_test[X])#.reshape(-1, 1)  
        y_test = np.array(df_test[y]).ravel()#.reshape(-1, 1)
        Mod = RandomForestRegressor(min_samples_split=min_samples_split, random_state=random_state+rnd)#,bootstrap=False)
        rnd+=1
        Mod.fit(X_train, y_train)
        Test = Mod.predict(X_test)
        Pred.append(Mod.predict(AllData[X]))
        R2.append(metrics.r2_score(y_test,Test))
        MSE.append(metrics.mean_squared_error(y_test,Test))
        Imp.append(Mod.feature_importances_)
    Imp = np.mean(np.array(Imp),axis=0)
    Pred = np.mean(np.array(Pred),axis=0)
    if Verbose == True:
        print()
        print(y[0])
        print('RMSE', np.mean(np.array(MSE)**.5))
        print('std RMSE', np.std(np.array(MSE)**.5))
        print('r2', np.median(np.array(R2)))
        print('Imp',np.round(Imp,3)*100)
    if Task == 'Predict':
        return(Pred)
    if Task == 'Predict_Return':
        return(Pred,Imp,MSE,R2)
    elif Task == 'Optimize':
        return(MSE)

def Model_Test_OLS(X,y,AllData,K=10,Task = 'Predict',fit_intercept=None,Scale=False,Verbose=True,random_state=1):
    if Scale==True:
        Mod = make_pipeline(StandardScaler(), LinearRegression(fit_intercept=fit_intercept))
    else:
        Mod = make_pipeline(LinearRegression(fit_intercept=fit_intercept))
    Data = AllData[X+y].dropna()
    kf = KFold(n_splits=K,shuffle=True, random_state = random_state)
    R2 = []
    MSE = []
    Int = []
    Coef = []
    Pred = []
    for train_index, test_index in kf.split(Data):
        df_test = Data.iloc[test_index]
        df_train = Data.iloc[train_index]
        X_train = np.array(df_train[X])#.reshape(-1, 1)     
        y_train = np.array(df_train[y])#.reshape(-1, 1)
        X_test = np.array(df_test[X])#.reshape(-1, 1)  
        y_test = np.array(df_test[y])#.reshape(-1, 1)
        Mod.fit(X_train, y_train)
        Test = Mod.predict(X_test)
        if y == ['VPD'] or y == ['PPFD_Avg']:
            Test[Test<0]=0
        
        R2.append(metrics.r2_score(y_test,Test))
        MSE.append(metrics.mean_squared_error(y_test,Test))
        Pred.append(Mod.predict(AllData[X]))
        Int.append(Mod[-1].intercept_)
        Coef.append(Mod[-1].coef_)
    Coef = np.mean(np.array(Coef),axis=0).mean(axis=0)
    Pred = np.mean(np.array(Pred),axis=0)#.mean(axis=0)
    Int = np.mean(np.array(Int))
    if Verbose == True:
        print()
        print(y[0])
        print('RMSE', np.mean(np.array(MSE)**.5))
        print('std RMSE', np.std(np.array(MSE)**.5))
        print('r2', np.median(np.array(R2)))
        print('coef', Coef)
        print('int',Int)
    if Task == 'Predict':
        return(Pred)#.values)
    if Task == 'Predict_Return':
        return(Pred,Coef,Int,MSE,R2)
    elif Task == 'Optimize':
        return(MSE)
    
def Model_Test(X,y,AllData,K=10,Task = 'Predict',Test = 'OLS',min_samples_split=None,fit_intercept=True,Scale=False,Verbose=True,random_state=1):
    if Test == 'OLS':
        return(Model_Test_OLS(X,y,AllData,K=10,Task = Task,fit_intercept=fit_intercept,Scale=Scale,Verbose=Verbose,random_state=random_state))
    elif Test == 'RF':
        return(Model_Test_RF(X,y,AllData,K=10,Task = Task,min_samples_split=min_samples_split,Verbose=Verbose,random_state=random_state))
    

def calculate_pvalues(df):
    df = df.dropna()._get_numeric_data()
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            pvalues[r][c] = round(pearsonr(df[r], df[c])[1], 4)
    return pvalues


In [434]:

def get_uv(Dir,U_bar,dec=0):
    M_dir = 270-(Dir+dec)
    U = np.cos(M_dir/180*np.pi)*U_bar
    V = np.sin(M_dir/180*np.pi)*U_bar
    return(U,V)
def get_wDir(u,v,m_dec=0):
    W_dir = np.arctan2(v,u)*180/np.pi
    W_dir = 270 - W_dir    
    W_dir[W_dir>360]-=360
    return(W_dir)

# Data

In [435]:
Taglu_Data = Output_Path+'Taglu_Data/Hourly.csv'
AWS = pd.read_csv(Taglu_Data,
parse_dates={'datetime':['Year','Month','Day','Time']}, 
).set_index('datetime')

# Gap Fill
C=['Air Temp','Net Radiation','Net SW Radiation','Net LW Radiation ','Wind Speed']
s = AWS.index.to_series()
print('Missing')
for c in C:
    print(c,AWS.loc[((AWS.index.month>=4)&(AWS.index.month<=10)),c].isnull().sum())
    miss = AWS[c].isna()
    #create consecutive groups
    g = miss.ne(miss.shift()).cumsum()
    #aggregate minimal 
    m1 = s.groupby(g).min()
    #get minimal of next groups, last value is replaced last value of index
    m2 = m1.shift(-1).fillna(AWS.index[-1])
    #get difference, convert to minutes
    out = m2.sub(m1).dt.total_seconds().div(3600).astype(int)
    AWS['Temp_Gap'] = g.map(out)
    AWS['Temp']=AWS[c].interpolate()
    AWS.loc[AWS['Temp_Gap']<=4,c]=AWS.loc[AWS['Temp_Gap']<=4,'Temp']
    AWS['Temp1']=AWS[c].shift(24)
    AWS['Temp2']=AWS[c].shift(-24)
    AWS.loc[AWS['Temp_Gap']>=4,c].fillna(AWS.loc[AWS['Temp_Gap']>=4,['Temp1','Temp2']].mean(axis=1))
AWS = AWS.drop(columns=['Temp_Gap','Temp','Temp1','Temp2'])
AWS_Rename = {'Air Temp':'Air_Temp',
              'Rainfall':'Rainfall',
              'Wind Speed':'Wind_Speed',
              'Wind direction':'Wind_Direction',
              'Net SW Radiation':'Net_SW',
              'Net LW Radiation ':'Net_LW',
              'Net Radiation':'Net_RN'}
AWS = AWS.rename(columns=AWS_Rename)
AWS_cols = list(AWS_Rename.values())

LAT = 69.371182
LON = -134.880935
TZ = -6

Temp = AWS[['Net_SW']].resample('5T').asfreq()
D = Temp.index.floor('D').to_julian_date()
T = Temp.index.hour/24+Temp.index.minute/60/24
A = np.ones(D.shape[0])

Zenith,Angle,Angle_Corr,Azimuth,Sunrise,Sunset=SS.SunStats(LAT*A,LON*A,D.values,T.values,TZ*A)
Temp['Sun_Angle'] = Angle_Corr
Temp = Temp.resample('30T').mean()

Temp['Daytime_Mask']=0.0
Temp.loc[Temp['Sun_Angle']>-0.5,'Daytime_Mask']=1.0

print(Temp.groupby('Daytime_Mask').count()['Sun_Angle'])
Temp = Temp.resample('H').mean()
print(Temp.groupby('Daytime_Mask').count()['Sun_Angle'])

try:
    AWS = AWS.drop(columns=['Sun_Angle','Daytime_Mask'])
except:
    pass
AWS = AWS.join(Temp[['Sun_Angle','Daytime_Mask']])

AWS['Daytime']=0
AWS.loc[AWS['Sun_Angle']>-0.5,'Daytime']=1


Mt = pytz.timezone('Canada/Mountain')
UTC = AWS.index+timedelta(hours=6)
AWS = AWS.set_index(UTC)
AWS.index = AWS.index.tz_localize(pytz.utc).tz_convert(Mt)

U,V = get_uv(AWS['Wind_Direction'],AWS['Wind_Speed'])
AWS['U_bar_AWS'] = U
AWS['V_bar_AWS'] = V
## 
print('Bad SW Fix',AWS.loc[AWS['Net_SW']<-5,'Net_SW'].count())
AWS.loc[AWS['Net_SW']<-5,'Net_SW']=-5
AWS.loc[AWS['Net_SW']<-5,'Net_RN']=AWS.loc[AWS['Net_SW']<-5,['Net_SW','Net_LW']].sum(axis=1)

AWS.groupby('Daytime').count()['Air_Temp']

Missing
Air Temp 40
Net Radiation 40
Net SW Radiation 40
Net LW Radiation  40
Wind Speed 40


  W2 = np.degrees(np.arccos(np.cos(np.radians(90.833))/(np.cos(np.radians(LAT))*np.cos(np.radians(T2)))-np.tan(np.radians(LAT))*np.tan(np.radians(T2)))) #HA Sunrise (deg)


Daytime_Mask
0.0     99673
1.0    109924
Name: Sun_Angle, dtype: int64
Daytime_Mask
0.0    48317
0.5     3039
1.0    53443
Name: Sun_Angle, dtype: int64
Bad SW Fix 913


Daytime
0    49891
1    54870
Name: Air_Temp, dtype: int64

# EC Data

In [436]:
EC_Data = pd.read_csv(Output_Path+'FI_Footprints_2022-02-23_2000mx2m.csv',delimiter = ',',header = 0,na_values = -9999,
                   index_col=['datetime'],
                  ).drop('Unnamed: 0',axis=1)
EC_Data = EC_Data.set_index(pd.to_datetime(EC_Data.index, utc=True)).tz_convert(Mt)
EC_Data = EC_Data.rename(columns={'Daytime':'Daytime_Raw',
                                 'Sun_Angle':'Sun_Angle_Raw'})

U,V=get_uv(EC_Data['wind_dir'],EC_Data['wind_speed'],dec=-22)
EC_Data['U_bar_EC']=U
EC_Data['V_bar_EC']=V

FSO = pd.read_csv(Output_Path+'Skeeter_et_al_2022_Day_Fix.csv',index_col=['datetime']).drop('Unnamed: 0',axis=1)#.drop('datetime.1',axis=1)

FSO = FSO.set_index(pd.to_datetime(FSO.index, utc=True)).tz_convert(Mt)
FSO = FSO.rename(columns={'NEE_est':'NEE_FSO',
                            'NME_est':'NME_FSO'})
EC_Data = EC_Data.join(FSO[['NEE_FSO',
'NME_FSO']])

kwargs = {'closed':'right','label':'left'}
EC_Data_r=EC_Data.resample('h',**kwargs).mean()
EC_Data_r['Rain_mm_Tot']=EC_Data.resample('h',**kwargs).sum()['Rain_mm_Tot']

EC_Data_r = EC_Data_r.drop(['Wind_Direction', 'Wind_Speed'],axis=1)

EC_Data_r_mask = EC_Data.resample('h',**kwargs).count()[['fco2','fch4']]

EC_Data_r_mask.loc[EC_Data_r_mask['fco2']<2,'fco2']=np.nan
EC_Data_r_mask.loc[EC_Data_r_mask['fch4']<2,'fch4']=np.nan

EC_Data_r_mask/=2
EC_Data_r['fco2']*=EC_Data_r_mask['fco2']
EC_Data_r['fch4']*=EC_Data_r_mask['fch4']

print(EC_Data_r[['fco2','co2_flux','fch4','ch4_flux']].count())

Data_M1 = EC_Data_r[
                    ['Temp_15_1','Temp_15_2','Temp_5_1','Temp_5_2','Table_1','VWC_1','Tdew','VPD','H',
                    'AirTC_Avg','PPFD_Avg','NR_Wm2_Avg','Rain_mm_Tot','Active_Layer_1',
                    'wind_speed','u*','U_bar_EC','V_bar_EC','Polygon','Rim',
                    'fch4','fco2','NEE_FSO','NME_FSO','ch4_flux','co2_flux']
                    ].join(AWS,how='outer')
Data_M1.head()

print(EC_Data_r_mask)

# print((EC_Data[['Temp_15_1','Temp_15_2','Temp_5_1','Temp_5_2','Table_1','VWC_1','Tdew','VPD','H',
#                     'AirTC_Avg','PPFD_Avg','NR_Wm2_Avg','Rain_mm_Tot','Active_Layer_1',
#                     'wind_speed','u*','U_bar_EC','V_bar_EC','Polygon','Rim',
#                     'fch4','fco2','NEE_FSO','NME_FSO','ch4_flux','co2_flux']].count()/EC_Data.shape[0]).round(2))


print((EC_Data_r[['fco2','co2_flux','fch4','ch4_flux']].count()/EC_Data_r.shape[0]).round(2))

print(Data_M1['fco2'].groupby(Data_M1['Daytime_Mask']).count())

# print(Data_M1['Daytime','Daytime_Mask'])

fco2        1164
co2_flux    1337
fch4         653
ch4_flux     788
dtype: int64
                           fco2  fch4
datetime                             
2017-06-22 23:00:00-06:00   NaN   NaN
2017-06-23 00:00:00-06:00   1.0   1.0
2017-06-23 01:00:00-06:00   1.0   1.0
2017-06-23 02:00:00-06:00   1.0   NaN
2017-06-23 03:00:00-06:00   NaN   NaN
...                         ...   ...
2017-09-13 08:00:00-06:00   1.0   1.0
2017-09-13 09:00:00-06:00   1.0   1.0
2017-09-13 10:00:00-06:00   1.0   1.0
2017-09-13 11:00:00-06:00   1.0   1.0
2017-09-13 12:00:00-06:00   NaN   NaN

[1982 rows x 2 columns]
fco2        0.59
co2_flux    0.67
fch4        0.33
ch4_flux    0.40
dtype: float64
Daytime_Mask
0.0     103
0.5      17
1.0    1044
Name: fco2, dtype: int64


# Merge

- Eddypro outputs have left labels (half hour timestamp corresponds to end of interval)
    * https://www.licor.com/documents/1ium2zmwm6hl36yz9bu4
- Need to close the right interval to match with AWS

In [437]:
for v1,v2 in zip(['Rain_mm_Tot','AirTC_Avg','wind_speed','NR_Wm2_Avg'],
                 ['Rainfall','Air_Temp','Wind_Speed','Net_RN']):
    DNA=Data_M1.loc[Data_M1['Active_Layer_1'].isnull()==False,[v1,v2]].dropna()
    print(v1,' ',v2)
    print('r2: ',(DNA[[v1,v2]].corr()**2)[v1].round(3)[1])
    print('RMSE: ',(metrics.mean_squared_error(DNA[v1],DNA[v2])**.5).round(3))
    print('MBE: ',np.round((DNA[v1]-DNA[v2]).mean(),2))
    print()

Rain_mm_Tot   Rainfall
r2:  0.988
RMSE:  0.05
MBE:  -0.01

AirTC_Avg   Air_Temp
r2:  0.988
RMSE:  0.547
MBE:  -0.09

wind_speed   Wind_Speed
r2:  0.945
RMSE:  0.43
MBE:  0.0

NR_Wm2_Avg   Net_RN
r2:  0.978
RMSE:  43.069
MBE:  9.45



# ECMWF

- ECMWF data does not account for DST
    - Must offset by one hour so summer times match

In [438]:
ECMWF = pd.read_csv(G_Drive+'earthengine//Climate_Weighted_Mean_long.csv',
                    parse_dates=['system:index'],na_values=-9999,index_col=['system:index'])

UTC = ECMWF.index+timedelta(hours=-1)
ECMWF = ECMWF.set_index(UTC)
ECMWF.index = ECMWF.index.tz_localize(pytz.utc).tz_convert(Mt)
ECMWF = ECMWF.drop(columns=['.geo'])

ECMWF['Wind_10m']  =(ECMWF['u_component_of_wind_10m']**2+ECMWF['v_component_of_wind_10m']**2)**.5
ECMWF['HH']=ECMWF['surface_sensible_heat_flux_hourly']/3600*-1
for v in ['temperature_2m' ,'soil_temperature_level_1','soil_temperature_level_2','soil_temperature_level_3']:
    ECMWF[v]-=273.15
ECMWF['total_precipitation_hourly']=ECMWF['total_precipitation_hourly']*1000
ECMWF[['SW','LW']]=ECMWF[['surface_net_solar_radiation_hourly',
    'surface_net_thermal_radiation_hourly']]/3600

ECMWF['RN']=ECMWF[['SW','LW']].sum(axis=1)

Data_M2 = Data_M1.join(ECMWF.loc[ECMWF.index.year>=2008],how = 'outer')

Data_M2['W_Dir_ERA5'] = get_wDir(Data_M2['u_component_of_wind_10m'].values,Data_M2['v_component_of_wind_10m'].values)
Data_M2['W_Dir_EC'] = get_wDir(Data_M2['U_bar_EC'].values,Data_M2['V_bar_EC'].values)

Data_M2['Wind_Direction']=Data_M2['Wind_Direction'].fillna(Data_M2['W_Dir_ERA5'])

In [439]:
InSitu = ['AirTC_Avg','Air_Temp','Tdew',]
#           'wind_speed','Wind_Speed','H','NR_Wm2_Avg','Net_RN','Net_SW','Net_LW']
ERA5 = ['temperature_2m','temperature_2m','dewpoint_temperature_2m',
        'Wind_10m','Wind_10m','HH','RN','RN','SW','LW']

for v1,v2 in zip(InSitu,ERA5):#,'Wind_Speed','Net_RN']):
    DNA=Data_M2.loc[((Data_M2.index.month>=5)&(Data_M2.index.month<=10)),[v1,v2]].dropna()
    print(v1,v2)
#     if v1 == 'H':
#         DNA[v2]/=-3600
    print('r2: ',(DNA[[v1,v2]].corr()**2)[v1].round(2)[1])
    print('RMSE: ',(metrics.mean_squared_error(DNA[v1],DNA[v2])**.5).round(3))
    print('MBE: ',np.round((DNA[v1]-DNA[v2]).mean(),2))
    print()

AirTC_Avg temperature_2m
r2:  0.91
RMSE:  1.52
MBE:  0.53

Air_Temp temperature_2m
r2:  0.93
RMSE:  2.068
MBE:  -0.14

Tdew dewpoint_temperature_2m
r2:  0.85
RMSE:  1.682
MBE:  1.06



In [440]:
InSitu = ['Temp_5_1','Temp_5_2','Temp_5_1','Temp_5_2',
#           'Temp_15_1','Temp_15_2',
          'Temp_15_1','Temp_15_2',
#           'VWC_1','VWC_1',
          'Table_1','Table_1']
ERA5 = ['soil_temperature_level_1','soil_temperature_level_1','soil_temperature_level_2','soil_temperature_level_2',
#         'soil_temperature_level_2','soil_temperature_level_2',
        'soil_temperature_level_3','soil_temperature_level_3',
#         'volumetric_soil_water_layer_1','volumetric_soil_water_layer_2',
        'volumetric_soil_water_layer_1','volumetric_soil_water_layer_2']

for v1,v2 in zip(InSitu,ERA5):#,'Wind_Speed','Net_RN']):
    DNA=Data_M2.loc[((Data_M2.index.month>=5)&(Data_M2.index.month<=10)),[v1,v2]].dropna()
    print(v1,v2)
#     if v1 == 'H':
#         DNA[v2]/=-3600
    print('r2: ',(DNA[[v1,v2]].corr()**2)[v1].round(2)[1])
    print('RMSE: ',(metrics.mean_squared_error(DNA[v1],DNA[v2])**.5).round(3))
    print('MBE: ',np.round((DNA[v1]-DNA[v2]).mean(),3))
    print()

Temp_5_1 soil_temperature_level_1
r2:  0.43
RMSE:  7.169
MBE:  -5.912

Temp_5_2 soil_temperature_level_1
r2:  0.75
RMSE:  4.625
MBE:  -3.645

Temp_5_1 soil_temperature_level_2
r2:  0.67
RMSE:  4.928
MBE:  -4.58

Temp_5_2 soil_temperature_level_2
r2:  0.57
RMSE:  3.254
MBE:  -2.324

Temp_15_1 soil_temperature_level_3
r2:  0.32
RMSE:  4.325
MBE:  -4.059

Temp_15_2 soil_temperature_level_3
r2:  0.36
RMSE:  4.175
MBE:  -3.92

Table_1 volumetric_soil_water_layer_1
r2:  0.81
RMSE:  0.376
MBE:  -0.37

Table_1 volumetric_soil_water_layer_2
r2:  0.85
RMSE:  0.41
MBE:  -0.41



# Satelite & Stream

In [441]:
NDWI = pd.read_csv(G_Drive+'earthengine/NDWI_NBAR_LCP.csv',index_col=['date'],
                   parse_dates=['date'],na_values=-9999)
NDWI=NDWI.drop(['.geo','system:index'],axis=1)

NDVI = pd.read_csv(G_Drive+'earthengine/NDVI_NBAR_LCP.csv',index_col=['date'],
                   parse_dates=['date'],na_values=-9999)
NDVI=NDVI.drop(['.geo','system:index'],axis=1)

NDSI = pd.read_csv(G_Drive+'earthengine/NDSI_NBAR_LCP.csv',index_col=['date'],
                   parse_dates=['date'],na_values=-9999)
NDSI=NDSI.drop(['.geo','system:index'],axis=1)

NBAR = NDVI.join(NDWI).join(NDSI)

print(NBAR.loc[NBAR.index.month==11].count())

print('Completeness')
Temp = NBAR.loc[((NBAR.index.year>=2008)&(NBAR.index.year<=2020)&
                    (NBAR.index.month>=4)&(NBAR.index.month<=11))].copy()
Temp['Amt'] = 1

print(((Temp.groupby(Temp.index.month).count().T[:3])/Temp.groupby(Temp.index.month).count()['Amt'].values).round(2))
print((Temp.count()/Temp.shape[0]).round(3))

GPP = pd.read_csv(G_Drive+'earthengine/GPP_FI_LCP.csv',parse_dates=['date'],na_values=-9999,index_col=['date']).dropna()
GPP.index = GPP.index.tz_localize(pytz.utc).tz_convert(Mt)
GPP=GPP.drop(['.geo','system:index'],axis=1)

GPP_fill = GPP.resample('D').mean()#.asfreq()
GPP_fill.rename(columns={'GPP_FI_LCP':'GPP_est'})
GPP_fill = GPP_fill.rolling(8*4,center=True,min_periods=4,win_type='gaussian').mean(std=2)/8
GPP_fill.describe()


NBAR.index = NBAR.index.tz_localize(pytz.utc).tz_convert(Mt)
# NBAR


NBAR = NBAR.join(GPP_fill)
NBAR

NDVI_FI_LCP    0
NDWI_FI_LCP    0
NDSI_FI_LCP    0
dtype: int64
Completeness
date          4    5    6    7    8     9     10   11
NDVI_FI_LCP  1.0  1.0  1.0  1.0  1.0  0.96  0.75  0.0
NDWI_FI_LCP  1.0  1.0  1.0  1.0  1.0  0.96  0.75  0.0
NDSI_FI_LCP  1.0  1.0  1.0  1.0  1.0  0.96  0.75  0.0
NDVI_FI_LCP    0.849
NDWI_FI_LCP    0.849
NDSI_FI_LCP    0.848
Amt            1.000
dtype: float64


Unnamed: 0_level_0,NDVI_FI_LCP,NDWI_FI_LCP,NDSI_FI_LCP,GPP_FI_LCP
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2008-03-31 18:00:00-06:00,-0.036834,0.048231,0.824971,
2008-04-01 18:00:00-06:00,-0.037209,0.047948,0.841071,
2008-04-02 18:00:00-06:00,-0.037368,0.048138,0.841380,
2008-04-03 18:00:00-06:00,-0.037529,0.048336,0.841681,
2008-04-04 18:00:00-06:00,-0.037752,0.048517,0.842168,
...,...,...,...,...
2020-10-25 18:00:00-06:00,0.008592,0.001801,0.817429,
2020-10-26 18:00:00-06:00,,,,
2020-10-27 18:00:00-06:00,,,,
2020-10-28 18:00:00-06:00,,,,


In [442]:
Gauge = pd.read_csv(Output_Path+'Gauge_Data.csv',
                   parse_dates=['Date'],na_values=['-9999'])
Gauge = Gauge.set_index(pd.DatetimeIndex(Gauge['Date']))
Gauge=Gauge.drop('Date',axis=1)
Gauge.loc[Gauge['EAST CHANNEL']>15,'EAST CHANNEL']=np.nan
Gauge.loc[((Gauge.index.year==2015)&(Gauge.index.dayofyear>=116)&
           (Gauge.index.dayofyear<=170)),'BIG LAKE']=np.nan
Gauge.index = Gauge.index.tz_localize(pytz.utc).tz_convert(Mt)
Data = Data_M2.join(NBAR.join(Gauge))
Data['DOY']=Data.index.dayofyear
Data['Year']=Data.index.year
Data['Month']=Data.index.month
Data['Date'] = Data.index.date
Data.head()
# Data[['NDSI_FI_LCP','NDVI_FI_LCP']]=Data[['NDSI_FI_LCP','NDVI_FI_LCP']].ffill()

Unnamed: 0,Temp_15_1,Temp_15_2,Temp_5_1,Temp_5_2,Table_1,VWC_1,Tdew,VPD,H,AirTC_Avg,...,NDSI_FI_LCP,GPP_FI_LCP,KULUARPAK,BIG LAKE,RED RIVER,EAST CHANNEL,DOY,Year,Month,Date
2008-01-01 00:00:00-07:00,,,,,,,,,,,...,,,,,,,1,2008,1,2008-01-01
2008-01-01 01:00:00-07:00,,,,,,,,,,,...,,,,,,,1,2008,1,2008-01-01
2008-01-01 02:00:00-07:00,,,,,,,,,,,...,,,,,,,1,2008,1,2008-01-01
2008-01-01 03:00:00-07:00,,,,,,,,,,,...,,,,,,,1,2008,1,2008-01-01
2008-01-01 04:00:00-07:00,,,,,,,,,,,...,,,,,,,1,2008,1,2008-01-01


# Daily

* Temperature Fill
* NDIS Fill

In [443]:
Y = 'Temp'

Daily = Data.resample('D').mean()
Daily['Date'] = Daily.index.date
Daily[['Rainfall','total_precipitation_hourly']] = Data.resample('D').sum()[['Rainfall','total_precipitation_hourly']]
# Daily = Daily.loc[((Daily.index.month>=4)&(Daily.index.month<=11))]
Daily = Daily.loc[((Daily.index.year>=2008)&(Daily.index.year<=2020))]

X=['temperature_2m']
y = ['Air_Temp']
K=30
Val = Model_Test(X,y,Daily.loc[~Daily[X].T.isnull().any()],
                            K=K,Test = 'OLS',Scale=True,Task='Predict',Verbose=True,min_samples_split=2)
Daily[Y]=np.nan
Daily.loc[~Daily[X].T.isnull().any(),Y]=Val.flatten()
Daily['Air_Temp']=Daily['Air_Temp'].fillna(Daily[Y])
Daily['DOY']=Daily.index.dayofyear
Daily['Year']=Daily.index.year

X = ['snow_cover']
y=['NDSI_FI_LCP']

Val = Model_Test(X,y,Daily.loc[~Daily[X].T.isnull().any()],
                            K=K,Test = 'OLS',Scale=True,Task='Predict',Verbose=True,min_samples_split=2)

Daily['NDSI_fill'] = Daily['NDSI_FI_LCP'].copy()
# Y = 'NDSI_fill'
Daily[Y]=np.nan
Daily.loc[~Daily[X].T.isnull().any(),Y]=Val.flatten()
Daily['NDSI_fill']=Daily['NDSI_fill'].fillna(Daily[Y])

Daily['NDVI_fill']=Daily['NDVI_FI_LCP'].interpolate()



Air_Temp
RMSE 2.1962029349925243
std RMSE 0.11392815106803611
r2 0.9778904346791844
coef [14.49793744]
int -8.464076225606993

NDSI_FI_LCP
RMSE 0.26032877963832524
std RMSE 0.01762332651638947
r2 0.8342819353180768
coef [0.56762451]
int 0.14291782144688805


# Snowmelt

In [444]:
Daily['Snow_Season'] = 'Winter'

# Thresh = Daily.loc[((Daily.index.month==4)),
#                   'NDSI_fill'].quantile(0.5)
# print(Thresh)
for y in range(2008,2021):
    Thresh = Daily.loc[((Daily.index.year==y)&(Daily.index.month==4)),
                      'NDSI_fill'].quantile(0)
    Thresh2 = Daily.loc[((Daily['NDSI_fill']>=Thresh)&(Daily.index.year==y)&
                         (Daily.index.month>=5)&(Daily.index.month<=6)),
                      'DOY'][-1]
    Daily.loc[((Daily['NDSI_fill']<Thresh)&(Daily.index.year==y)&
               (Daily.index.dayofyear>Thresh2)&(Daily.index.month<=6)),
                      'Snow_Season'] = 'Snow Melt'
    Daily.loc[((Daily['NDSI_fill']<0)&(Daily.index.year==y)),
                      'Snow_Season'] = 'Snow Free'
#     print(Daily.loc[((Daily['NDSI_fill']<0)&(Daily.index.year==y)),
#                       'Snow_Season'])
Daily['Growing_Season']=Daily['Snow_Season'].copy()


GS_Thresh = 0.25
for y in range(2008,2021):
    Thresh = Daily.loc[((Daily.index.year==y)),#&(Daily.index.month==4)),
                      'NDVI_fill'].max()
    Thresh2 = Daily.loc[((Daily.index.year==y)&(Daily.NDVI_fill==Thresh))].index[0]
    
    Daily.loc[((Daily.index.year==y)&(Daily.index>Thresh2)&(Daily['NDVI_FI_LCP']>GS_Thresh)&
               (Daily['Snow_Season']!='Winter')),
                      'Growing_Season'] = 'Sensecence'
    Daily.loc[((Daily.index.year==y)&(Daily.index<Thresh2)&(Daily['NDVI_FI_LCP']>GS_Thresh)&
               (Daily['Snow_Season']!='Winter')),
                      'Growing_Season'] = 'Early Season'
    Daily.loc[((Daily.index.year==y)&(Daily['NDVI_FI_LCP']/Thresh>0.9)),
                      'Growing_Season'] = 'Peak Season'

Thresh = 0
Daily['Warm'] = 0
for y in range(2008,2021):
    Ix = Daily.loc[((Daily.index.year==y)&(Daily.Air_Temp>Thresh))].index
    Daily.loc[((Daily.index.year==y)&(Daily.index>Ix[0])&(Daily.index<Ix[-1])),'Warm']=1
     
Daily['Year']=Daily.index.year
Daily['Growing_Season']=Daily['Growing_Season'].replace({'Snow Free':'Green Up'})

Daily['Season']=Daily['Warm']*1
Daily.loc[Daily['Snow_Season']!='Snow Free','Season']=0

print()

FDF = pd.DataFrame(data={})
# FDF
# print(Daily.groupby(['Year','Snow_Season']).sum()['Season'].unstack())
FDF['Snow Melt'] = Daily.loc[Daily['Season']==1].resample('Y').first()['Date']
FDF['Start'] = Daily.loc[Daily['Season']==1].resample('Y').first()['Date']
FDF['End'] = Daily.loc[Daily['Season']==1].resample('Y').last()['Date']
# FDF['Span'] = Daily.loc[Daily['Season']==1].resample('Y').sum()['Season']


FDF.index=FDF.index.year
FDF['Duration'] = FDF['End']-FDF['Start']



Rank = FDF.loc[((FDF.index>2008)&(FDF.index<2020)),['Duration']].sort_values(by='Duration',ascending=False).reset_index().reset_index().set_index('index').rename(columns={'level_0':'Rank'})


FDF[['Start','End','Duration']].join(Rank['Rank']+1).dropna()
# FDF

# print(Rank)




Unnamed: 0,Start,End,Duration,Rank
2009,2009-06-10,2009-09-23,105 days,10.0
2010,2010-06-02,2010-09-24,114 days,9.0
2011,2011-06-02,2011-10-03,123 days,6.0
2012,2012-06-02,2012-10-06,126 days,5.0
2013,2013-06-10,2013-10-21,133 days,2.0
2014,2014-06-05,2014-10-03,120 days,7.0
2015,2015-05-31,2015-09-28,120 days,8.0
2016,2016-05-27,2016-10-06,132 days,3.0
2017,2017-05-31,2017-10-10,132 days,4.0
2018,2018-06-10,2018-09-21,103 days,11.0


# Flood

In [445]:
Flood = Daily.loc[((Daily.index.month>=5)&(Daily.index.month<=6))].copy()
for y in range(2008,2019):
    Fy=Flood.loc[Flood.index.year==y].copy()
#     Sf=Daily.loc[((Daily.index.year==y)&(Daily.index.month>=7))].copy()
    for s in ['KULUARPAK','BIG LAKE']:
        if Fy[s].count()/(1464/24)>.3:
            v=Fy[s].max()
            d = Fy.loc[((Fy[s]==v))].index.date
            FDF.loc[FDF.index==y,s+'_Peak']=v
            FDF.loc[FDF.index==y,s+'_Peak_Date']=d


for y,row in FDF.iterrows():
    Year = Daily.loc[Daily.index.year == y]
    S = (Daily.loc[((Daily.index==row['BIG LAKE_Peak_Date'])|(Daily.index==row['KULUARPAK_Peak_Date'])),
                   ['NDVI_FI_LCP']].max())
    FDF.loc[FDF.index==y,'NDVI']=S.values
    S = (Daily.loc[((Daily.index==row['BIG LAKE_Peak_Date'])|(Daily.index==row['KULUARPAK_Peak_Date'])),
                   ['NDSI_FI_LCP']].min())
    FDF.loc[FDF.index==y,'NDSI']=S.values
#     print(S)
# FDF['NDSI'].describe()
Temp = FDF.dropna()
(FDF['Start']-FDF['BIG LAKE_Peak_Date']).describe()
# (FDF['Start']-FDF['KULUARPAK_Peak_Date']).describe()
# Temp

# FDF[['Start','End','BIG LAKE_Peak_Date']]

count                            8
mean               1 days 00:00:00
std      2 days 17:24:46.705721211
min              -3 days +00:00:00
25%              -1 days +12:00:00
50%                1 days 00:00:00
75%                2 days 12:00:00
max                5 days 00:00:00
dtype: object

In [446]:
for y in range(2008,2021):
    Y = Daily.loc[Daily.index.year==y].copy()
    Y['T']=(Y['Air_Temp']*Y['Season']).cumsum()
    Daily.loc[Daily.index.year==y,'T_Sum'] = Y['T']#(Y['T']**.5).ffill()
#     Y['T']=(Y['Season']*Y['Air_Temp']**5).cumsum()
    Daily.loc[Daily.index.year==y,'T_Sum_Root'] = (Y['T']**.5).ffill()
    
# plt.figure()
# plt.plot(Daily['T_Sum'])

Exp = 0.5

for y in range(2009,2021):
    Year = Daily.loc[Daily.index.year==y].copy()
    Year['Season_Sum'] = Year['Season'].cumsum()
    Daily.loc[Daily.index.year==y,'Season_Sum']=Year['Season_Sum']*1
    Daily.loc[Daily.index.year==y,'Season_Root']=Year['Season_Sum']**Exp
    FDF.loc[FDF.index==y,'Season_Sum']=Year['Season_Sum'].max()
    FDF.loc[FDF.index==y,'T_Sum']=Year['T_Sum'].max()
    FDF.loc[FDF.index==y,'T_Sum_Root']=Year['T_Sum_Root'].max()
    FDF.loc[FDF.index==y,'Season_Root']=Year['Season_Sum'].max()**Exp
    Daily.loc[Daily.index.year==y,'Season']=Year['Season']

FDF['Season_Root']=FDF['Season_Sum']**Exp

AL_obs = pd.read_csv(Output_Path+'AL_obs.csv',index_col=['Date'],parse_dates=['Date'])

AL_s = ['Reindeer Depot  (Thaw tube)','Lousy Point (Thaw tube)','Taglu (grid)']


ALH=pd.read_csv(Output_Path+'AL_Depth.csv',header=[1])
AL=pd.read_csv(Output_Path+'AL_Depth.csv',skiprows=4)
AL.columns=ALH.columns
AL=AL.rename(columns={'Name':'Year'})
AL = AL.set_index('Year')
AL[AL_s]/=100


# AL = AL[AL_s].dropna()
AL = AL.loc[AL.index>=2009,AL_s]

T = Daily.loc[Daily.index.year>=2008].resample('Y').mean()[['Air_Temp']]
T['Year']=T.index.year
T = T.set_index('Year')
AL = AL.join(T).join(FDF)
C = ((AL.corr()).round(2)[AL_s][2:])
S = np.sign(C)#*10
C = S.multiply(C**2).round(3)
C[C.abs()<.3]=np.nan
print('Correlations')
print(C[C.isnull().sum(axis=1)<2])
# AL.corr().round(2)[AL_s]

Correlations
             Reindeer Depot  (Thaw tube)  Lousy Point (Thaw tube)  \
Air_Temp                             NaN                    0.476   
Season_Sum                         0.689                    0.410   
T_Sum                              0.436                    0.348   
T_Sum_Root                         0.462                    0.372   
Season_Root                        0.689                    0.410   

             Taglu (grid)  
Air_Temp            0.656  
Season_Sum            NaN  
T_Sum                 NaN  
T_Sum_Root            NaN  
Season_Root           NaN  


In [447]:
v = 'Season_Root'
v = 'Season_Sum'
# v = 'T_Sum'
param = v


X = ['Season_Sum']
X = ['Season_Root']
# X = ['T_Sum']
# X = ['T_Sum_Root']

# X = ['T_Sum','Season_Sum']
# X = ['T_Sum_Root','Season_Root']

print('AL Comparisson')
print(AL_s[0])
df = AL[[AL_s[0]]+X].dropna()
Mod = LinearRegression()
Mod.fit(df[X], df[AL_s[0]])
print(Mod.coef_)
Test = Mod.predict(df[X])
print(metrics.r2_score(df[AL_s[0]],Test))

print()
print(AL_s[1])
df = AL[[AL_s[1]]+X].dropna()
Mod = LinearRegression()
Mod.fit(df[X], df[AL_s[1]])
print(Mod.coef_)
Test = Mod.predict(df[X])
print(metrics.r2_score(df[AL_s[1]],Test))
AL = AL.loc[AL.index>=2008].copy()
# AL


Data.loc[((Data.index.month<=4)|(Data.index.month>=11)),['Season','Season_Sum','Season_Root','T_Sum']]=np.nan
Data.loc[Data['Season'].isnull(),['Season_Sum','Season_Root','T_Sum']]=np.nan

for v in ['DOY','Season_Sum','Season_Root','T_Sum','T_Sum_Root']:
    for d in AL_obs.index.unique():
        AL_obs.loc[AL_obs.index==d,v]=Daily.loc[Daily.index.date==d,v].values[0]

y=['Active_Layer_1']
Y = 'AL_data_est'
Lag = []
D = []
K = 10


Val,C,I,RMSE,R2 = Model_Test(X,y,AL_obs.copy(),K=K,Test = 'OLS',Task='Predict_Return')
AL_obs[Y]=Val
try:
    Daily['AL_data_est'] = (Daily[X[0]])*C[0]+(Daily[X[1]])*C[1]+I
#     Data['AL_data_est'] = (Data[X[0]])*C[0]+ (Data[X[1]])*C[1]+I
except:
    Daily['AL_data_est'] = (Daily[X[0]])*C[0]+I
#     Data['AL_data_est'] = (Data[X[0]])*C[0]+I
    pass
Data['AL_data_est']=Daily['AL_data_est'].resample('H').interpolate()
Daily.loc[Daily['AL_data_est']<=0,'AL_data_est']=np.nan

T = Daily.loc[Daily.index.year>=2008].resample('Y').max()[['AL_data_est']]
T['Year']=T.index.year
T = T.set_index('Year')
try:
    AL = AL.drop(columns='AL_data_est')
except:
    pass
AL = AL.join(T)
AL = AL.loc[AL['AL_data_est'].isnull()==False].copy()

# print(Daily.loc[Daily['Snow_Season']=='Snow Free','AL_data_est'].resample('Y').min())

print((AL.loc[AL.index>=2009,['AL_data_est']+AL_s].corr()**2)['AL_data_est'])
print(AL.loc[AL.index>=2009,AL_s].corr()**2)

AL Comparisson
Reindeer Depot  (Thaw tube)
[0.15550593]
0.6951784007998872

Lousy Point (Thaw tube)
[0.08431784]
0.4146654982614295

Active_Layer_1
RMSE 0.023551523762420534
std RMSE 0.006025373642306173
r2 0.9247953563437045
coef [0.05225029]
int -0.04109618387843158
AL_data_est                    1.000000
Reindeer Depot  (Thaw tube)    0.695178
Lousy Point (Thaw tube)        0.414665
Taglu (grid)                   0.001711
Name: AL_data_est, dtype: float64
                             Reindeer Depot  (Thaw tube)  \
Reindeer Depot  (Thaw tube)                     1.000000   
Lousy Point (Thaw tube)                         0.655218   
Taglu (grid)                                    0.004664   

                             Lousy Point (Thaw tube)  Taglu (grid)  
Reindeer Depot  (Thaw tube)                 0.655218      0.004664  
Lousy Point (Thaw tube)                     1.000000      0.007456  
Taglu (grid)                                0.007456      1.000000  


In [448]:

plt.figure()
plt.plot(Data.loc[Data.index.year==2017,'AL_data_est'])
print(Daily['AL_data_est'].min())

# Min = (Daily.resample('Y').min()[['AL_data_est']])
# Min.index=Min.index.year
print(Data.resample('Y').min()['AL_data_est'])
print(Data.resample('Y').max()['AL_data_est'])


# print(AL.join(Min))

<IPython.core.display.Javascript object>

0.011154101766418949
2008-12-31 00:00:00-07:00         NaN
2009-12-31 00:00:00-07:00   -0.041096
2010-12-31 00:00:00-07:00   -0.041096
2011-12-31 00:00:00-07:00   -0.041096
2012-12-31 00:00:00-07:00   -0.041096
2013-12-31 00:00:00-07:00   -0.041096
2014-12-31 00:00:00-07:00   -0.041096
2015-12-31 00:00:00-07:00   -0.041096
2016-12-31 00:00:00-07:00   -0.041096
2017-12-31 00:00:00-07:00   -0.041096
2018-12-31 00:00:00-07:00   -0.041096
2019-12-31 00:00:00-07:00   -0.041096
2020-12-31 00:00:00-07:00   -0.041096
2021-12-31 00:00:00-07:00         NaN
Freq: A-DEC, Name: AL_data_est, dtype: float64
2008-12-31 00:00:00-07:00         NaN
2009-12-31 00:00:00-07:00    0.496853
2010-12-31 00:00:00-07:00    0.519226
2011-12-31 00:00:00-07:00    0.540738
2012-12-31 00:00:00-07:00    0.547735
2013-12-31 00:00:00-07:00    0.563745
2014-12-31 00:00:00-07:00    0.533657
2015-12-31 00:00:00-07:00    0.533657
2016-12-31 00:00:00-07:00    0.561484
2017-12-31 00:00:00-07:00    0.561484
2018-12-31 00:00:00-

In [449]:
# print(Data.loc[((Data.index.year == 2017)&(Data.Season == 1))][0:1].index.dayofyear)

print(AL_obs[['DOY','Season_Root','Season_Sum','Active_Layer_1']].corr()['Active_Layer_1']**2)
y = ['Active_Layer_1']

X = ['T_Sum']
Val,C1,I1,MSE,R2 = Model_Test(X,y,AL_obs.copy(),K=K,Test = 'OLS',Task='Predict_Return')

Daily['DSSM'] = Daily.index.dayofyear-152
Sub = Daily.loc[((Daily.index.year==2017)&(Daily.Season>=0))]


print(C1*1+I1)

print()
X = ['Season_Root']
Val,C,I,MSE,R2 = Model_Test(X,y,AL_obs.copy(),K=K,Test = 'OLS',Task='Predict_Return')

print()
print()
print(C*1+I)

fig,ax=plt.subplots(figsize=(3.5,3.5)) 
plt.plot(Sub.index,Sub['T_Sum']*C1+I1,label='Linear: $r^2 = 0.94$')
plt.plot(Sub.index,(Sub['Season_Root'])*C+I,label='Exponential: $r^2 = 0.93$')

plt.scatter(AL_obs.index, AL_obs['Active_Layer_1'],color='grey',edgecolor='k')
ax.set_title('Thaw Depth Estimation')

ax.set_ylabel('Thaw Depth (m)')

plt.grid()
plt.ylim(0,0.65)
fig.autofmt_xdate(ha='center')
plt.tight_layout()
plt.savefig(G_Drive+'/PhD_Thesis/Chapter3_Figs/AL_fig.png',dpi=600)

DOY               0.962242
Season_Root       0.949469
Season_Sum        0.962242
Active_Layer_1    1.000000
Name: Active_Layer_1, dtype: float64

Active_Layer_1
RMSE 0.02333646792696542
std RMSE 0.00686062204609137
r2 0.9229825654564996
coef [0.00031112]
int 0.18301126239076956
[0.18332238]


Active_Layer_1
RMSE 0.023551523762420534
std RMSE 0.006025373642306173
r2 0.9247953563437045
coef [0.05225029]
int -0.04109618387843158


[0.0111541]


<IPython.core.display.Javascript object>

In [450]:
plt.figure()
Melt = Daily.loc[((Daily.index.year==2009)&((Daily.index.month>=4)&(Daily.index.month<=7)))].copy()
Melt.loc[Melt.index.month==7,'NDSI_FI_LCP']=np.nan
print(Melt.resample('M').min()['NDSI_FI_LCP'])
# # for y in M
plt.plot(Melt.index.dayofyear,Melt['NDSI_FI_LCP'])
plt.plot(Melt.index.dayofyear,Melt['NDSI_FI_LCP']*0+Melt.resample('M').min()['NDSI_FI_LCP'][0])
# plt.plot(Melt.index.dayofyear,Melt['NDVI_FI_LCP'])


print(Data.loc[Data.index.month==4].resample('M').quantile(0.25)['NDSI_FI_LCP'].dropna())

<IPython.core.display.Javascript object>

2009-04-30 00:00:00-06:00    0.774142
2009-05-31 00:00:00-06:00    0.806176
2009-06-30 00:00:00-06:00   -0.409840
2009-07-31 00:00:00-06:00         NaN
Freq: M, Name: NDSI_FI_LCP, dtype: float64
2008-04-30 00:00:00-06:00    0.779749
2009-04-30 00:00:00-06:00    0.798749
2010-04-30 00:00:00-06:00    0.836788
2011-04-30 00:00:00-06:00    0.801237
2012-04-30 00:00:00-06:00    0.782492
2013-04-30 00:00:00-06:00    0.790589
2014-04-30 00:00:00-06:00    0.806752
2015-04-30 00:00:00-06:00    0.796722
2016-04-30 00:00:00-06:00    0.790469
2017-04-30 00:00:00-06:00    0.785157
2018-04-30 00:00:00-06:00    0.804892
2019-04-30 00:00:00-06:00    0.811239
2020-04-30 00:00:00-06:00    0.790558
Name: NDSI_FI_LCP, dtype: float64


  return f(*args, **kwargs)


# Snow Analysis

# Fill Params

# Sub-Surface Fill

In [451]:
# Data['Rolling_RainfallD14']=Data['Rainfall'].rolling(str(14)+'D').sum()
Data['Rolling_RainfallD6']=Data['Rainfall'].rolling(str(6)+'D').sum()
Data['Rolling_RainfallD3']=Data['Rainfall'].rolling(str(3)+'D').sum()
Data['Rolling_RainfallD']=Data['Rainfall'].rolling(str(1)+'D').sum()

# Data['Rolling_Air_Temp_D14']=Data['Air_Temp'].rolling(str(14)+'D').mean()
# Data['Rolling_Air_Temp_D6']=Data['Air_Temp'].rolling(str(6)+'D').mean()
Data['Rolling_Air_TempD3']=Data['Air_Temp'].rolling(str(3)+'D').mean()
Data['Rolling_Air_TempD']=Data['Air_Temp'].rolling(str(1)+'D').mean()

Data['Rolling_Air_TempH1']=Data['Air_Temp'].rolling(str(6)+'H').mean()
Data['Rolling_Air_TempH2']=Data['Air_Temp'].rolling(str(12)+'H').mean()
Data['Rolling_Air_TempH3']=Data['Air_Temp'].rolling(str(18)+'H').mean()

Y = 'Test'

SE_Rule = 2

Met = [
    'Net_SW',
    'Net_LW',
#     'Net_RN'
    'Air_Temp',
#     'Rainfall'
    ]
Lag_Rain = [
#     'Rolling_RainfallD6',
    'Rolling_RainfallD3',
#     'Rolling_RainfallD',
    ]
Lag_Air = [
    'Rolling_Air_TempH1',
    'Rolling_Air_TempH2',
    'Rolling_Air_TempH3',
    'Rolling_Air_TempD'
    ]
ERA5_Temp = [
    'soil_temperature_level_1',
    'soil_temperature_level_2',
    'soil_temperature_level_3'
    ]

ERA5_VWC = [
    'volumetric_soil_water_layer_1',
    'volumetric_soil_water_layer_2',
    ]

AllX = Met+Lag_Air+Lag_Rain+ERA5_Temp+ERA5_VWC
print(len(AllX))

Mods = {
}
for y in [['Temp_15_1'],['Temp_15_2'],['Temp_5_1'],['Temp_5_2'],['Table_1']]:
    X = AllX
    res = {
        'X':X,
        'Score':np.ones(len(X))/len(X)
    }
    Results = pd.DataFrame(data=res)
    K = 30
    Track = {'i':[],
            'Rem':[],
            'RMSE':[],
            'RMSES':[],
            'R2':[],
#             'R2S':[]
            }
    for i in range(len(X)):
        X = list(Results.X.values)
        Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                             Test = 'OLS',Scale=True,Task='Predict_Return',Verbose=False)
        MSE = np.array(MSE)**.5
        Results['Score'] = C**2/(C**2).sum()
        Results = Results.sort_values('Score')
        Rem = Results.loc[Results['Score']==Results['Score'].min(),'X'].values[0]
        Results = Results.loc[Results['Score']>Results['Score'].min()]
        Track['i'].append(len(X))
        Track['R2'].append(np.mean(np.array(R2)))
#         Track['R2S'].append(np.std(np.array(R2)))
        Track['RMSE'].append(MSE.mean())
        Track['RMSES'].append(MSE.std())#/(MSE.shape[0]**.5))
        Track['Rem'].append(Rem)

    Track = pd.DataFrame(data=Track)
    Track = Track.set_index('i')
    Track['RMSESS'] = (Track['RMSE'] + Track['RMSES']*SE_Rule)
#     Track['R2SS'] = (Track['R2'] - Track['R2S']*SE_Rule)
    Mods[y[0]] = {}
    Sel = Track.loc[Track['RMSE']<Track['RMSESS'].min(),['RMSE','RMSES','R2']][-1:]
#     Sel = Track.loc[Track['R2']/Track['R2'].max()>0.95,['RMSE','RMSES','R2']][-1:]
    M = Track.loc[Track.index<=Sel.index.values[0],'Rem'].values
    Mods[y[0]]['Stats']=Sel
    Mods[y[0]]['X']=M
    print(y)
    print(M)
    print()
    print(Sel)
#     print(Track)
    print()
    print()

13
['Temp_15_1']
['soil_temperature_level_2' 'volumetric_soil_water_layer_2'
 'soil_temperature_level_3' 'Air_Temp' 'Rolling_Air_TempH1' 'Net_SW'
 'Rolling_Air_TempH2' 'Rolling_Air_TempD' 'Rolling_Air_TempH3']

       RMSE     RMSES        R2
i                              
9  0.421864  0.017373  0.913586


['Temp_15_2']
['volumetric_soil_water_layer_2' 'Rolling_Air_TempH2' 'Rolling_Air_TempH3'
 'Rolling_Air_TempD' 'soil_temperature_level_3' 'Air_Temp'
 'Rolling_Air_TempH1' 'Net_SW']

       RMSE     RMSES        R2
i                              
8  0.472185  0.028897  0.891154


['Temp_5_1']
['Net_SW' 'soil_temperature_level_2' 'Rolling_Air_TempD'
 'soil_temperature_level_3' 'Rolling_Air_TempH3' 'Rolling_Air_TempH2'
 'Air_Temp' 'Rolling_Air_TempH1']

       RMSE     RMSES        R2
i                              
8  0.722865  0.027237  0.906771


['Temp_5_2']
['Net_LW' 'soil_temperature_level_1' 'Air_Temp' 'Rolling_Air_TempH1'
 'Rolling_Air_TempD' 'Rolling_Air_TempH3' 'Rolling_Air_Te

In [452]:
# print(Track['R2']/Track['R2'].max()>0.95)

# print(Data['Season'])

In [453]:
fig,ax=plt.subplots(len(Mods.keys()),sharex=True,figsize=(6,len(Mods.keys())*3))
j = 0
for Y in Mods.keys():
#     print(Mods[y]['X'])
    X = list(Mods[Y]['X'])
    y = [Y]
    print(Mods[Y]['Stats'])
    Val = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],
                                K=K,Test = 'OLS',Scale=True,Task='Predict',Verbose=False)
    Data[Y+'_OLS']=np.nan
    Data.loc[~Data[X].T.isnull().any(),Y+'_OLS']=Val
                     
    DNA = Data[y+[Y+'_OLS']].dropna()
    print()
    print('r2: ',metrics.r2_score(DNA[y],DNA[Y+'_OLS']))
    print('RMSE: ',metrics.mean_squared_error(DNA[y],DNA[Y+'_OLS'])**.5)


    for y in range(2009,2020):
        Yr = Data.loc[Data.index.year==y].resample('D').mean()
        if y == 2017:
            ax[j].plot(Yr['DOY'],Yr[Y+'_OLS'],color='r',linewidth=5)
        else:
            ax[j].plot(Yr['DOY'],Yr[Y+'_OLS'],color='b',linewidth=1)
    ax[j].plot(Data['DOY'],Data[Y],color='k')
    ax[j].set_title(Y+' '+ str(Mods[Y]['Stats']['R2'].round(2).values[0]))


    ax[j].axvspan(147,174,facecolor=[.97,.97,.97,.65],edgecolor='k')
    ax[j].axvspan(256,295,facecolor=[.97,.97,.97,.65],edgecolor='k')
    ax[j].set_xlim(130,310)
    j +=1

<IPython.core.display.Javascript object>

       RMSE     RMSES        R2
i                              
9  0.421864  0.017373  0.913586

r2:  0.9154535680166923
RMSE:  0.4201197432847053
       RMSE     RMSES        R2
i                              
8  0.472185  0.028897  0.891154

r2:  0.8930152939231452
RMSE:  0.4707782242653458
       RMSE     RMSES        R2
i                              
8  0.722865  0.027237  0.906771

r2:  0.908636185126631
RMSE:  0.7197915063359053
       RMSE     RMSES        R2
i                              
7  1.169391  0.051049  0.874523

r2:  0.8776341926932772
RMSE:  1.163050880262077
       RMSE     RMSES        R2
i                              
4  0.007089  0.000468  0.952754

r2:  0.9537124568529606
RMSE:  0.007082225579817953


In [454]:
Y = 'Test'
# SE_Rule = 2
Met = [
    'Net_LW',
    'Net_SW',
    'Wind_Speed',
    'Air_Temp',
#     'Rainfall'
    ]
ERA5_Met = [
    'temperature_2m',
    'dewpoint_temperature_2m',
    'Wind_10m',
    'HH',
    ]
AllX = Met+ERA5_Met
print(len(AllX))
Mods = {}
for y in [['u*'],['PPFD_Avg'],['VPD']]:
    X = AllX
    res = {
        'X':X,
        'Score':np.ones(len(X))/len(X)
    }
    Results = pd.DataFrame(data=res)
    K = 30
    Track = {'i':[],
            'Rem':[],
            'RMSE':[],
            'RMSES':[],
             'Int':[],
             'Coef':[],
            'R2':[],
            'R2S':[]}
    for i in range(len(X)):
        X = list(Results.X.values)
        Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                             Test = 'OLS',Scale=True,Task='Predict_Return',Verbose=False)
        MSE = np.array(MSE)**.5
        Results['Score'] = C**2/(C**2).sum()
        Results = Results.sort_values('Score')
        Rem = Results.loc[Results['Score']==Results['Score'].min(),'X'].values[0]
        Results = Results.loc[Results['Score']>Results['Score'].min()]
        Track['i'].append(len(X))
        Track['Int'].append(np.mean(np.array(I)))
        Track['Coef'].append(np.mean(np.array(C)))
        Track['R2'].append(np.mean(np.array(R2)))
        Track['R2S'].append(np.std(np.array(R2)))
        Track['RMSE'].append(MSE.mean())
        Track['RMSES'].append(MSE.std())#/(MSE.shape[0]**.5))
        Track['Rem'].append(Rem)

    Track = pd.DataFrame(data=Track)
    Track = Track.set_index('i')
    Track['RMSESS'] = (Track['RMSE'] + Track['RMSES']*SE_Rule)
    Track['R2SS'] = (Track['R2'] - Track['R2S']*SE_Rule)
    Mods[y[0]] = {}
    Sel = Track.loc[Track['RMSE']<Track['RMSESS'].min(),['RMSE','RMSES','R2','R2S','Coef','Int']][-1:]
#     Sel = Track.loc[Track['R2']/Track['R2'].max()>0.95,['RMSE','RMSES','R2']][-1:]
    M = Track.loc[Track.index<=Sel.index.values[0],'Rem'].values
    Mods[y[0]]['Stats']=Sel
    Mods[y[0]]['X']=M
    print(y)
    print(M)
    print(Sel)
#     print(Track)
    print()

8
['u*']
['Wind_Speed']
       RMSE     RMSES       R2       R2S      Coef       Int
i                                                           
1  0.045001  0.003854  0.89794  0.017879  0.135016  0.304448

['PPFD_Avg']
['Net_SW']
        RMSE     RMSES       R2       R2S        Coef         Int
i                                                                
1  38.631624  2.556721  0.99004  0.001961  389.665438  333.846646

['VPD']
['dewpoint_temperature_2m' 'Air_Temp']
         RMSE    RMSES        R2       R2S        Coef         Int
i                                                                 
2  143.640976  6.59898  0.837683  0.025646  119.975823  287.774113



In [455]:
X = ['Wind_Speed']
y = ['u*']
Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                     Test = 'OLS',Scale=False,Task='Predict_Return',Verbose=True)

X = ['Net_SW']
y = ['PPFD_Avg']
Data['Net_SW_T'] = Data['Net_SW'].copy()
Data.loc[Data['Net_SW_T']<0,'Net_SW_T']=0
Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                     Test = 'OLS',Scale=False,Task='Predict_Return',Verbose=True)


u*
RMSE 0.04500149440521988
std RMSE 0.0038542468485703867
r2 0.8948652491619089
coef [0.07370356]
int 0.021367641498866184

PPFD_Avg
RMSE 38.63162448677514
std RMSE 2.5567209462702647
r2 0.9904007390827857
coef [2.38028641]
int 3.848866329466529


In [None]:
fig,ax=plt.subplots(len(Mods.keys()),sharex=True,figsize=(6,len(Mods.keys())*3))
j = 0
for Y in Mods.keys():
#     print(Mods[y]['X'])
    X = list(Mods[Y]['X'])
    y = [Y]
    print(Mods[Y]['Stats'])
    Val = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],
                                K=K,Test = 'OLS',Scale=True,Task='Predict',Verbose=True)
    Data[Y+'_OLS']=np.nan
    Data.loc[~Data[X].T.isnull().any(),Y+'_OLS']=Val
                     
    DNA = Data[y+[Y+'_OLS']].dropna()
    print()
    print('r2: ',metrics.r2_score(DNA[y],DNA[Y+'_OLS']))
    print('RMSE: ',metrics.mean_squared_error(DNA[y],DNA[Y+'_OLS'])**.5)


    for y in range(2009,2020):
        Yr = Data.loc[Data.index.year==y].resample('D').mean()
        if y == 2017:
            ax[j].plot(Yr['DOY'],Yr[Y+'_OLS'],color='r',linewidth=5)
        else:
            ax[j].plot(Yr['DOY'],Yr[Y+'_OLS'],color='b',linewidth=1)
    ax[j].plot(Data['DOY'],Data[Y],color='k')
    ax[j].set_title(Y+' '+ str(Mods[Y]['Stats']['R2'].round(2).values[0]))
    
    ax[j].axvspan(147,174,facecolor=[.97,.97,.97,.65],edgecolor='k')
    ax[j].axvspan(256,295,facecolor=[.97,.97,.97,.65],edgecolor='k')
    j +=1

<IPython.core.display.Javascript object>

       RMSE     RMSES       R2       R2S      Coef       Int
i                                                           
1  0.045001  0.003854  0.89794  0.017879  0.135016  0.304448

u*
RMSE 0.04500149440521988
std RMSE 0.0038542468485703893
r2 0.8948652491619089
coef [0.13501616]
int 0.3044481420598676

r2:  0.8998657373147907
RMSE:  0.045041291088142706
        RMSE     RMSES       R2       R2S        Coef         Int
i                                                                
1  38.631624  2.556721  0.99004  0.001961  389.665438  333.846646

PPFD_Avg
RMSE 38.63162448677515
std RMSE 2.556720946270257
r2 0.9904007390827857
coef [389.66543789]
int 333.84664621212124

r2:  0.990258864514813
RMSE:  38.64921631423895
         RMSE    RMSES        R2       R2S        Coef         Int
i                                                                 
2  143.640976  6.59898  0.837683  0.025646  119.975823  287.774113


In [None]:
plt.figure()
plt.scatter(Data['PPFD_Avg_OLS'],Data['Net_SW'])

In [None]:
Data['Rolling_Rainfall_D14']=Data['Rainfall'].rolling(str(14)+'D').sum()
Data['Rolling_Rainfall_D6']=Data['Rainfall'].rolling(str(6)+'D').sum()
Data['Rolling_Rainfall_D3']=Data['Rainfall'].rolling(str(3)+'D').sum()
Data['Rolling_Rainfall_D']=Data['Rainfall'].rolling(str(1)+'D').sum()

Data['Rolling_Air_Temp_D3']=Data['Air_Temp'].rolling(str(3)+'D').mean()
Data['Rolling_Air_Temp_D']=Data['Air_Temp'].rolling(str(1)+'D').mean()

Data['Rolling_Air_TempH1']=Data['Air_Temp'].rolling(str(6)+'H').mean()
Data['Rolling_Air_TempH2']=Data['Air_Temp'].rolling(str(12)+'H').mean()
Data['Rolling_Air_TempH3']=Data['Air_Temp'].rolling(str(18)+'H').mean()

Y = 'Test'

# SE_Rule = 2

Met = [
    
    'Air_Temp',
#     'Rainfall'
    ]
ERA5_Met = [
    'temperature_2m',
    'dewpoint_temperature_2m',
    'W_Dir_ERA5',
#     'Wind_10m',
#     'HH',
    ]


AllX = Met+ERA5_Met
print(len(AllX))

Mods = {
}
for y in [['VPD']]:
    X = AllX
    res = {
        'X':X,
        'Score':np.ones(len(X))/len(X)
    }
    Results = pd.DataFrame(data=res)
    K = 10
    Track = {'i':[],
            'Rem':[],
            'RMSE':[],
            'RMSES':[],
            'R2':[],
            'R2S':[]}
    for i in range(len(X)):
        X = list(Results.X.values)
        Val,C,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                             Test = 'RF',Task='Predict_Return',Verbose=False,min_samples_split=2)
        MSE = np.array(MSE)**.5
        Results['Score'] = C#/C.sum()
        Results = Results.sort_values('Score')
        Rem = Results.loc[Results['Score']==Results['Score'].min(),'X'].values[0]
        Results = Results.loc[Results['Score']>Results['Score'].min()]
        Track['i'].append(len(X))
        Track['R2'].append(np.mean(np.array(R2)))
        Track['R2S'].append(np.std(np.array(R2)))
        Track['RMSE'].append(MSE.mean())
        Track['RMSES'].append(MSE.std())#/(MSE.shape[0]**.5))
        Track['Rem'].append(Rem)

    Track = pd.DataFrame(data=Track)
    Track = Track.set_index('i')
    Track['RMSESS'] = (Track['RMSE'] + Track['RMSES']*SE_Rule)
    Track['R2SS'] = (Track['R2'] - Track['R2S']*SE_Rule)
    Mods[y[0]] = {}
    Sel = Track.loc[Track['RMSE']<Track['RMSESS'].min(),['RMSE','RMSES','R2','R2S']][-1:]
#     Sel = Track.loc[Track['R2']/Track['R2'].max()>0.95,['RMSE','RMSES','R2']][-1:]
    M = Track.loc[Track.index<=Sel.index.values[0],'Rem'].values
    Mods[y[0]]['Stats']=Sel
    Mods[y[0]]['X']=M
    print(y)
    print(M)
    print(Sel)
    print(Track)
    
    print()

In [None]:
fig,ax=plt.subplots(len(Mods.keys()),sharex=True,figsize=(6,len(Mods.keys())*3))
if len(Mods.keys())==1:
    ax = [ax]
j = 0
for Y in Mods.keys():
#     print(Mods[y]['X'])
    X = list(Mods[Y]['X'])
    y = [Y]
    print(Mods[Y]['Stats'])
    Val = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],
                                K=K,Test = 'RF',Scale=True,Task='Predict',Verbose=False,min_samples_split=2)
    Data[Y+'_RF']=np.nan
    Data.loc[~Data[X].T.isnull().any(),Y+'_RF']=Val
                     
    DNA = Data[y+[Y+'_RF']].dropna()
    print()
    print('r2: ',metrics.r2_score(DNA[y],DNA[Y+'_RF']))
    print('RMSE: ',metrics.mean_squared_error(DNA[y],DNA[Y+'_RF'])**.5)


    for y in range(2009,2020):
        Yr = Data.loc[Data.index.year==y].resample('D').mean()
        if y == 2017:
            ax[j].plot(Yr['DOY'],Yr[Y+'_RF'],color='r',linewidth=5)
        else:
            ax[j].plot(Yr['DOY'],Yr[Y+'_RF'],color='b',linewidth=1)
    ax[j].plot(Data['DOY'],Data[Y],color='k')
    ax[j].set_title(Y+' '+ str(Mods[Y]['Stats']['R2'].round(2).values[0]))


    ax[j].axvspan(147,174,facecolor=[.97,.97,.97,.65],edgecolor='k')
    ax[j].axvspan(256,295,facecolor=[.97,.97,.97,.65],edgecolor='k')
    j +=1

# WX Summary Stats


In [None]:
# print(AL.columns)
# print(AL[['Peak_Day','Peak_Height','Peak_NDVI']].corr()**2)
# print(AL[['Peak_Day_BL','Peak_Height_BL']].corr()**2)
# print(AL[['Peak_Height_BL','Peak_Height']].corr()**2)
# print()
# print(AL['Peak_NDVI'].median())
# # print(np.round(AL['Peak_Height'].max()-AL['Peak_Height'].min(),2))
# # print(np.round(AL['Peak_Height'].max()-AL['Peak_Height'].min(),2))

# print(AL['Start_Day'])
# AL['Peak_Date'] = pd.to_datetime(AL.index * 1000 + AL['Peak_Day'], format='%Y%j')
# AL['Peak_Date_BL'] = pd.to_datetime(AL.index * 1000 + AL['Peak_Day_BL'], format='%Y%j')
# AL['Start_Date'] = pd.to_datetime(AL.index * 1000 + AL['Start_Day'], format='%Y%j')
# AL['End_Date'] = pd.to_datetime(AL.index * 1000 + AL['End_Day'], format='%Y%j')
# Yrly = AL[['Peak_Height','Peak_Date','Peak_Date_BL','Peak_Height',]]#.sort_values(by='Season_Sum')
# print(Yrly)
# Yrly = AL[['Start_Date','End_Date','Season_Sum']].reset_index(drop=True)#.sort_values(by='Season_Sum').reset_
# print(Yrly)
# 

In [None]:
# print(Daily)
Daily = Data.resample('D').mean()
Daily = Daily[Daily['Air_Temp'].isnull()==False]
# Daily.loc[Daily.index.year == 2009]

In [None]:
# # fig,ax = plt.subplots(figsize=(4,3.25))
# fig,((ax,ax2),(ax3,ax4)) = plt.subplots(2,2,figsize=(8,8))

# C = [
# '#b2182b',
# '#d6604d',
# '#f4a582',
# '#fddbc7',
# '#383838',
# '#d1e5f0',
# '#92c5de',
# '#4393c3',
# '#2166ac',
# ][::-1]
# I = 0
# # %matplotlib notebook

# # Daily = Data.resample('Y').mean()
# # Daily

# Daily['Warm']=np.nan
# Daily.loc[Daily['Air_Temp']>0,'Warm']=1

# Study_Years = {'Year':[],
#               'Warm_Days':[],
#               'Warm_First':[],
#               'Warm_Last':[]}

# for y in Yrly.index[::-1]:
#     Fy=Flood.loc[Flood.index.year==y].copy()
#     YR = Daily.loc[Daily.index.year==y].copy()
# #     for s in ['KULUARPAK']:
#     if Fy['KULUARPAK'].count()/(1464/24)>.8:
#         ax.plot(Fy.index.dayofyear,Fy['KULUARPAK'],color=C[I],label=str(int(y)))
#         ax2.plot(Fy.index.dayofyear,Fy['NDVI_interp'].diff().abs(),color=C[I],label=str(int(y)))
        
#         I += 1

#     if y != 2008:
#         Study_Years['Year'].append(y)
#         Study_Years['Warm_Last'].append((YR['Warm']*YR['DOY']).max())
#         Study_Years['Warm_First'].append((YR['Warm']*YR['DOY']).min())
#         YR['Warm']=YR['Warm'].fillna(0)
#         YR['Warm_Days']=YR['Warm'].cumsum()
#         Study_Years['Warm_Days'].append(YR['Warm_Days'].max())
#         ax3.plot(YR.index.dayofyear,YR['Warm_Days'])
#     ax4.plot(YR.index.dayofyear,YR['NDSI_interp'])

        
# Study_Stats = pd.DataFrame(data=Study_Years).sort_values(by='Year')
# print(Study_Stats)
# # ax3.bar(Study_Stats['Year'],Study_Stats['Warm_Last']-Study_Stats['Warm_First'],bottom=Study_Stats['Warm_First'])
# # ax3.bar(Study_Stats['Year'],Study_Stats['Warm_Days'])
# Save_Plots='C:\\Users\\User\\Google Drive\\PhD_Thesis\\Chapter3_Figs/'
# ax.legend(ncol=2,handlelength=1.5,columnspacing=1,fontsize=8)
# ax.set_ylim([8.5,10.4])
# ax.set_xlim(121,179)
# ax.grid()
# ax.set_ylabel('Gauge Height')
# ax.set_xlabel('Day of Year')
# ax.set_title('Kuluarpak Station Gauge Heights')
# # ax.set_xticks()
# plt.tight_layout()
# # plt.savefig(Save_Plots+'Gauge.png',dpi=600,bbox_inches='tight', pad_inches=0.05)

# Outputs

In [None]:
cols =['fch4','fco2','ch4_flux','co2_flux','Air_Temp','Rainfall','Wind_Speed','Wind_Direction',
        'Net_SW','Net_LW','Net_RN','Daytime','temperature_2m','dewpoint_temperature_2m',
       'volumetric_soil_water_layer_1','volumetric_soil_water_layer_2',
#        'volumetric_soil_water_layer_3',
#        'volumetric_soil_water_layer_4',
       'soil_temperature_level_1','soil_temperature_level_2',
       'soil_temperature_level_3',#'soil_temperature_level_4',
       'DOY',
       'NDSI_FI_LCP','NDVI_FI_LCP','GPP_FI_LCP',
       'snow_cover',
#        'NDSI_interp','GPP_interp','NDVI_interp',
       'Polygon','Rim','Polygon_Obs',
       'Rim_Obs',
       'VPD_RF',
       'Wind_Speed',
      'Net_RN',
       'Temp_5_1_OLS',
       'Temp_15_2_OLS',
       'Temp_15_1_OLS',
       'Temp_5_2_OLS',
       'VPD_RF',
       'PPFD_Avg_OLS',
       'u*_OLS',
       'Table_1_OLS',
       'AL_data_est',
       'Freezing','Green','Snow',
#        'Ts_15_1_OLS','Ts_5_1_OLS,','Ts_15_2_OLS','Ts_5_2_OLS',#'VPD_OLS',
#        'PPFD_AWS_RF','u*_est_RF','WTD_RF','AL_data_est_RF',
       'NEE_FSO','NME_FSO','Season_Sum','Season_Root',#'Season_Sum2',
       'Season',#,'Season2','Season3',
      'Daytime',
      ]

RN = {'Wind_Speed':'wind_speed',
      'Net_RN':'NR_Wm2_Avg',
       'Temp_5_1_OLS':'Temp_5_1',
       'Temp_15_2_OLS':'Temp_15_2',
       'Temp_15_1_OLS':'Temp_15_1',
       'Temp_5_2_OLS':'Temp_5_2',
       'VPD_RF':'VPD',
       'PPFD_Avg_OLS':'PPFD_Avg',
       'u*_OLS':'u*',
       'Table_1_OLS':'Table_1',
       'AL_data_est':'Active_Layer_1',
     }


# 58.8%	25.6%

Data['Polygon_Obs'] = Data['Polygon'].copy()
# Data['Polygon']=Data['Polygon'].median()
Data['Polygon'] = 0.588 # ALF derrived values
Data['Rim_Obs'] = Data['Rim'].copy()
# Data['Rim']=Data['Rim'].median()
Data['Polygon'] = 0.256 # ALF derrived values


for col in RN:
#     print(columns)
    Data[RN[col]+'_FO'] = Data[RN[col]].copy()
    Data[RN[col]+'_Diff'] = Data[RN[col]+'_FO']-Data[col]
    cols.append(RN[col]+'_FO')
    cols.append(RN[col]+'_Diff')

# Path = 'G:/My Drive/FishIsland_Outputs/Network_Outputs/Arctic_Sci_8Vars/'
    
Export = Data.copy()
Export.index.name = 'datetime'
Export.index = Export.index.tz_localize(None)
Export = Export.loc[((Export.index.month>=5)&(Export.index.month<=10))]
# Export[cols].rename(columns=RN).to_csv(Output_Path+'Network_Outputs/Arctic_Sci_8Vars/AWS_FI_Test.csv')
Export[cols].rename(columns=RN).to_csv(Output_Path+'Network_Outputs/Arctic_Sci_8Vars/AWS_FI_Final.csv')
# Export[cols].rename(columns=RN).to_csv('C:\\Users\\wesle\\NetworkAnalysis\\FishIsland/AWS_FI_Test.csv')
print('Done')

In [None]:
print(Data[['Polygon','Rim']].mean())
plt.figure()
plt.plot(Data.loc[Data.index.year==2014,'Season'])