In [1]:
# import os
import SunStatistics as SS
from scipy.stats import pearsonr
%matplotlib notebook
# import scipy.stats as stats
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta,datetime
import pytz

from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

G_Drive = 'G:\\My Drive\\'
G_Drive = 'C:\\Users\\User\\Google Drive\\'
Output_Path = G_Drive+'FishIsland_Outputs/'



# Model Functions

In [2]:
def Model_Test_RF(X,y,AllData,K=10,Task = 'Predict',min_samples_split=None,Verbose=True,random_state=1):
    Data = AllData[X+y].dropna()
    kf = KFold(n_splits=K,shuffle=True, random_state = random_state)
    R2 = []
    MSE = []
    Imp = []    
    Pred = []
    rnd=1
    for train_index, test_index in kf.split(Data):
        df_test = Data.iloc[test_index]
        df_train = Data.iloc[train_index]
        X_train = np.array(df_train[X])#.reshape(-1, 1)     
        y_train = np.array(df_train[y]).ravel()#.reshape(-1, 1)
        X_test = np.array(df_test[X])#.reshape(-1, 1)  
        y_test = np.array(df_test[y]).ravel()#.reshape(-1, 1)
        Mod = RandomForestRegressor(min_samples_split=min_samples_split, random_state=random_state+rnd)#,bootstrap=False)
        rnd+=1
        Mod.fit(X_train, y_train)
        Test = Mod.predict(X_test)
        Pred.append(Mod.predict(AllData[X]))
        R2.append(metrics.r2_score(y_test,Test))
        MSE.append(metrics.mean_squared_error(y_test,Test))
        Imp.append(Mod.feature_importances_)
    Imp = np.mean(np.array(Imp),axis=0)
    Pred = np.mean(np.array(Pred),axis=0)
    if Verbose == True:
        print()
        print(y[0])
        print('RMSE', np.mean(np.array(MSE)**.5))
        print('std RMSE', np.std(np.array(MSE)**.5))
        print('r2', np.median(np.array(R2)))
        print('Imp',np.round(Imp,3)*100)
    if Task == 'Predict':
        return(Pred)
    if Task == 'Predict_Return':
        return(Pred,Imp,MSE,R2)
    elif Task == 'Optimize':
        return(MSE)

def Model_Test_OLS(X,y,AllData,K=10,Task = 'Predict',fit_intercept=None,Scale=False,Verbose=True,random_state=1):
    if Scale==True:
        Mod = make_pipeline(StandardScaler(), LinearRegression(fit_intercept=fit_intercept))
    else:
        Mod = make_pipeline(LinearRegression(fit_intercept=fit_intercept))
    Data = AllData[X+y].dropna()
    kf = KFold(n_splits=K,shuffle=True, random_state = random_state)
    R2 = []
    MSE = []
    Int = []
    Coef = []
    Pred = []
    for train_index, test_index in kf.split(Data):
        df_test = Data.iloc[test_index]
        df_train = Data.iloc[train_index]
        X_train = np.array(df_train[X])#.reshape(-1, 1)     
        y_train = np.array(df_train[y])#.reshape(-1, 1)
        X_test = np.array(df_test[X])#.reshape(-1, 1)  
        y_test = np.array(df_test[y])#.reshape(-1, 1)
        Mod.fit(X_train, y_train)
        Test = Mod.predict(X_test)
        if y == ['VPD'] or y == ['PPFD_Avg']:
            Test[Test<0]=0
        
        R2.append(metrics.r2_score(y_test,Test))
        MSE.append(metrics.mean_squared_error(y_test,Test))
        Pred.append(Mod.predict(AllData[X]))
        Int.append(Mod[-1].intercept_)
        Coef.append(Mod[-1].coef_)
    Coef = np.mean(np.array(Coef),axis=0).mean(axis=0)
    Pred = np.mean(np.array(Pred),axis=0)#.mean(axis=0)
    Int = np.mean(np.array(Int))
    if Verbose == True:
        print()
        print(y[0])
        print('RMSE', np.mean(np.array(MSE)**.5))
        print('std RMSE', np.std(np.array(MSE)**.5))
        print('r2', np.median(np.array(R2)))
        print('coef', Coef)
        print('int',Int)
    if Task == 'Predict':
        return(Pred)#.values)
    if Task == 'Predict_Return':
        return(Pred,Coef,Int,MSE,R2)
    elif Task == 'Optimize':
        return(MSE)
    
def Model_Test(X,y,AllData,K=10,Task = 'Predict',Test = 'OLS',min_samples_split=None,fit_intercept=True,Scale=False,Verbose=True,random_state=1):
    if Test == 'OLS':
        return(Model_Test_OLS(X,y,AllData,K=10,Task = Task,fit_intercept=fit_intercept,Scale=Scale,Verbose=Verbose,random_state=random_state))
    elif Test == 'RF':
        return(Model_Test_RF(X,y,AllData,K=10,Task = Task,min_samples_split=min_samples_split,Verbose=Verbose,random_state=random_state))
    

def calculate_pvalues(df):
    df = df.dropna()._get_numeric_data()
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            pvalues[r][c] = round(pearsonr(df[r], df[c])[1], 4)
    return pvalues


In [3]:

def get_uv(Dir,U_bar,dec=0):
    M_dir = 270-(Dir+dec)
    U = np.cos(M_dir/180*np.pi)*U_bar
    V = np.sin(M_dir/180*np.pi)*U_bar
    return(U,V)
def get_wDir(u,v,m_dec=0):
    W_dir = np.arctan2(v,u)*180/np.pi
    W_dir = 270 - W_dir    
    W_dir[W_dir>360]-=360
    return(W_dir)

# Data

In [4]:
Taglu_Data = Output_Path+'Taglu_Data/Hourly.csv'
AWS = pd.read_csv(Taglu_Data,
parse_dates={'datetime':['Year','Month','Day','Time']}, 
).set_index('datetime')

# Gap Fill
C=['Air Temp','Net Radiation','Net SW Radiation','Net LW Radiation ','Wind Speed']
s = AWS.index.to_series()
print('Missing')
for c in C:
    print(c,AWS.loc[((AWS.index.month>=4)&(AWS.index.month<=10)),c].isnull().sum())
    miss = AWS[c].isna()
    #create consecutive groups
    g = miss.ne(miss.shift()).cumsum()
    #aggregate minimal 
    m1 = s.groupby(g).min()
    #get minimal of next groups, last value is replaced last value of index
    m2 = m1.shift(-1).fillna(AWS.index[-1])
    #get difference, convert to minutes
    out = m2.sub(m1).dt.total_seconds().div(3600).astype(int)
    AWS['Temp_Gap'] = g.map(out)
    AWS['Temp']=AWS[c].interpolate()
    AWS.loc[AWS['Temp_Gap']<=4,c]=AWS.loc[AWS['Temp_Gap']<=4,'Temp']
    AWS['Temp1']=AWS[c].shift(24)
    AWS['Temp2']=AWS[c].shift(-24)
    AWS.loc[AWS['Temp_Gap']>=4,c].fillna(AWS.loc[AWS['Temp_Gap']>=4,['Temp1','Temp2']].mean(axis=1))
AWS = AWS.drop(columns=['Temp_Gap','Temp','Temp1','Temp2'])
AWS_Rename = {'Air Temp':'Air_Temp',
              'Rainfall':'Rainfall',
              'Wind Speed':'Wind_Speed',
              'Wind direction':'Wind_Direction',
              'Net SW Radiation':'Net_SW',
              'Net LW Radiation ':'Net_LW',
              'Net Radiation':'Net_RN'}
AWS = AWS.rename(columns=AWS_Rename)
AWS_cols = list(AWS_Rename.values())

LAT = 69.371182
LON = -134.880935
TZ = -6

Temp = AWS[['Net_SW']].resample('5T').asfreq()
D = Temp.index.floor('D').to_julian_date()
T = Temp.index.hour/24+Temp.index.minute/60/24
A = np.ones(D.shape[0])

Zenith,Angle,Angle_Corr,Azimuth,Sunrise,Sunset=SS.SunStats(LAT*A,LON*A,D.values,T.values,TZ*A)
Temp['Sun_Angle'] = Angle_Corr
Temp = Temp.resample('30T').mean()

Temp['Daytime_Mask']=0.0
Temp.loc[Temp['Sun_Angle']>-0.5,'Daytime_Mask']=1.0

print(Temp.groupby('Daytime_Mask').count()['Sun_Angle'])
Temp = Temp.resample('H').mean()
print(Temp.groupby('Daytime_Mask').count()['Sun_Angle'])

try:
    AWS = AWS.drop(columns=['Sun_Angle','Daytime_Mask'])
except:
    pass
AWS = AWS.join(Temp[['Sun_Angle','Daytime_Mask']])

AWS['Daytime']=0
AWS.loc[AWS['Sun_Angle']>-0.5,'Daytime']=1


Mt = pytz.timezone('Canada/Mountain')
UTC = AWS.index+timedelta(hours=6)
AWS = AWS.set_index(UTC)
AWS.index = AWS.index.tz_localize(pytz.utc).tz_convert(Mt)

U,V = get_uv(AWS['Wind_Direction'],AWS['Wind_Speed'])
AWS['U_bar_AWS'] = U
AWS['V_bar_AWS'] = V
## 
print('Bad SW Fix',AWS.loc[AWS['Net_SW']<-5,'Net_SW'].count())
AWS.loc[AWS['Net_SW']<-5,'Net_SW']=-5
AWS.loc[AWS['Net_SW']<=-5,'Net_RN']=AWS.loc[AWS['Net_SW']<=-5,['Net_SW','Net_LW']].sum(axis=1)

AWS.groupby('Daytime').count()['Air_Temp']

Missing
Air Temp 40
Net Radiation 40
Net SW Radiation 40
Net LW Radiation  40
Wind Speed 40


  W2 = np.degrees(np.arccos(np.cos(np.radians(90.833))/(np.cos(np.radians(LAT))*np.cos(np.radians(T2)))-np.tan(np.radians(LAT))*np.tan(np.radians(T2)))) #HA Sunrise (deg)


Daytime_Mask
0.0     99673
1.0    109924
Name: Sun_Angle, dtype: int64
Daytime_Mask
0.0    48317
0.5     3039
1.0    53443
Name: Sun_Angle, dtype: int64
Bad SW Fix 913


Daytime
0    49891
1    54870
Name: Air_Temp, dtype: int64

# EC Data

In [208]:
EC_Data = pd.read_csv(Output_Path+'FI_Footprints_2022-02-23_2000mx2m.csv',delimiter = ',',header = 0,na_values = -9999,
                   index_col=['datetime'],
                  ).drop('Unnamed: 0',axis=1)
EC_Data = EC_Data.set_index(pd.to_datetime(EC_Data.index, utc=True)).tz_convert(Mt)
EC_Data = EC_Data.rename(columns={'Daytime':'Daytime_Raw',
                                 'Sun_Angle':'Sun_Angle_Raw'})

EC_Data.loc[EC_Data['flowrate_mean']<0.0001,['VPD','Tdew']] = np.nan

U,V=get_uv(EC_Data['wind_dir'],EC_Data['wind_speed'],dec=-22)
EC_Data['U_bar_EC']=U
EC_Data['V_bar_EC']=V

FSO = pd.read_csv(Output_Path+'Skeeter_et_al_2022_Day_Fix.csv',
                  index_col=['datetime']).drop('Unnamed: 0',axis=1)#.drop('datetime.1',axis=1)

FSO = FSO.set_index(pd.to_datetime(FSO.index, utc=True)).tz_convert(Mt)
FSO = FSO.rename(columns={'NEE_est':'NEE_FSO',
                            'NME_est':'NME_FSO'})
EC_Data = EC_Data.join(FSO[['NEE_FSO',
'NME_FSO']])

EC_Data['dmx']= EC_Data.index.dayofyear*1000+EC_Data.index.hour+0

EC_Data['DMX'] = EC_Data['dmx'].shift(1)

kwargs = {'closed':'right','label':'left'}
EC_Data_r=EC_Data.resample('h',**kwargs).mean()
EC_Data_r['Rain_mm_Tot']=EC_Data.resample('h',**kwargs).sum()['Rain_mm_Tot']

EC_Data_r = EC_Data_r.drop(['Wind_Direction', 'Wind_Speed'],axis=1)



EC_Data_r_mask = EC_Data.resample('h',**kwargs).count()[['fco2','fch4']]
EC_Data_r_mask['DMX'] = EC_Data.resample('h',**kwargs).min()['DMX']



EC_Data_r_mask.loc[EC_Data_r_mask['fco2']<2,'fco2']=np.nan
EC_Data_r_mask.loc[EC_Data_r_mask['fch4']<2,'fch4']=np.nan

EC_Data_r_mask[['fco2','fch4']]/=2
EC_Data_r['fco2']*=EC_Data_r_mask['fco2']
EC_Data_r['fch4']*=EC_Data_r_mask['fch4']

print(EC_Data_r[['fco2','co2_flux','fch4','ch4_flux']].count())

Data_M1 = EC_Data_r[
                    ['Temp_15_1','Temp_15_2','Temp_5_1','Temp_5_2','Table_1','VWC_1','Tdew','VPD','H',
                    'AirTC_Avg','PPFD_Avg','NR_Wm2_Avg','Rain_mm_Tot','Active_Layer_1',
                    'wind_speed','u*','U_bar_EC','V_bar_EC','Polygon','Rim','air_temperature',
                    'fch4','fco2','NEE_FSO','NME_FSO','ch4_flux','co2_flux']
                    ].join(AWS,how='outer')
Data_M1.head()




# for v in ['Temp_15_1','Temp_15_2','Temp_5_1','Temp_5_2','Table_1','VPD','Tdew',
#                     'AirTC_Avg','PPFD_Avg','NR_Wm2_Avg','Rain_mm_Tot','Active_Layer_1',
#                     'wind_speed','u*','ch4_flux','co2_flux','LE','flowrate_mean']:
#     print(v)
#     print((EC_Data[v].count()/EC_Data.shape[0]).round(2))
#     print()

print((EC_Data_r[['fco2','co2_flux','fch4','ch4_flux']].count()/EC_Data_r.shape[0]).round(2))

print(Data_M1['fco2'].groupby(Data_M1['Daytime_Mask']).count())

# EC_Data['foc2_fshift'] = EC_Data['fco2'].shift(1)
# EC_Data['foc2_bshift'] = EC_Data['fco2'].shift(1)
Kx = EC_Data.index.dayofyear*10000+EC_Data.index.hour*100+EC_Data.index.minute
EC_Data['Kx'] = Kx
Kx2 = EC_Data.index.dayofyear*10000+EC_Data.index.hour*100+EC_Data.shift(1).index.minute
EC_Data['Kx2'] = EC_Data['Kx'].shift(1)
# EC_Data.loc[EC_Data.index.minute+0==30,Kx]# = EC_Data.loc[EC_Data.index.minute==30,Kx2]

EC_Data.loc[EC_Data.index.minute==30,'Kx']=np.nan
EC_Data.loc[EC_Data.index.minute==0,'Kx2']=np.nan
EC_Data['Kx']=EC_Data['Kx'].ffill()
EC_Data['Kx2']=EC_Data['Kx2'].bfill()
# EC_Data[['Kx','Kx2']].shape

# # EC_Data.loc[EC_Data['Kx']==EC_Data['Kx2']]

# print(EC_Data[['fco2','NEE_FSO','fch4','NME_FSO']].describe())
# print(EC_Data_r[['fco2','NEE_FSO','fch4','NME_FSO']].describe())


fco2        1164
co2_flux    1337
fch4         653
ch4_flux     788
dtype: int64
fco2        0.59
co2_flux    0.67
fch4        0.33
ch4_flux    0.40
dtype: float64
Daytime_Mask
0.0     103
0.5      17
1.0    1044
Name: fco2, dtype: int64


In [224]:
# EC_Data['v1'] = EC_Data['fco2'].isnull()
# EC_Data['v1'] = EC_Data['fco2'].shift(-1)#.isnull()
# EC_Data['v2'] = EC_Data['fco2'].shift(1)#.isnull()

# # EC_Data['S1']
# print(EC_Data_r_mask['fco2'].sum())

# Temp = EC_Data_r_mask.resample('30T').bfill()
# print()

# print(EC_Data['DMX'].head())


for i, row in EC_Data_r_mask[['fco2','DMX']].dropna().iterrows():
#     print(i,row['DMX'])
    EC_Data.loc[EC_Data['DMX'] == row['DMX'],'fco2_score']=1
for i, row in EC_Data_r_mask[['fch4','DMX']].dropna().iterrows():
#     print(i,row['DMX'])
    EC_Data.loc[EC_Data['DMX'] == row['DMX'],'fch4_score']=1
    
print(EC_Data.loc[EC_Data['fco2_score']==1,'fco2'].describe())
print(EC_Data_r['fco2'].describe())

print()

print(EC_Data.loc[EC_Data['fch4_score']==1,'fch4'].describe())
print(EC_Data_r['fch4'].describe())
# # EC_Data.loc[(((EC_Data.index.minute==0)&(EC_Data[['fco2','v1']].isnull().sum(axis=1)==2))|
# #             ((EC_Data.index.minute==30)&(EC_Data[['fco2','v2']].isnull().sum(axis=1)==2)))].shape



plt.figure()
plt.bar([1,2],
        [EC_Data.loc[EC_Data['fch4_score']==1,'fch4'].mean(),EC_Data_r['fch4'].mean()],
        yerr=[EC_Data.loc[EC_Data['fch4_score']==1,'fch4'].std(),EC_Data_r['fch4'].std()])


plt.bar([1,2],
        [EC_Data.loc[EC_Data['fco2_score']==1,'fco2'].mean(),EC_Data_r['fco2'].mean()],
        yerr=[EC_Data.loc[EC_Data['fco2_score']==1,'fco2'].std(),EC_Data_r['fco2'].std()])

count    2328.000000
mean       -0.892303
std         1.494217
min        -5.481734
25%        -2.040237
50%        -0.905254
75%         0.493159
max         2.350742
Name: fco2, dtype: float64
count    1164.000000
mean       -0.892303
std         1.480637
min        -4.939822
25%        -2.083515
50%        -0.923200
75%         0.500982
max         2.236564
Name: fco2, dtype: float64

count    1306.000000
mean        0.033751
std         0.012586
min        -0.005279
25%         0.024510
50%         0.033631
75%         0.042996
max         0.070513
Name: fch4, dtype: float64
count    653.000000
mean       0.033751
std        0.011991
min        0.003733
25%        0.024934
50%        0.033316
75%        0.042685
max        0.063004
Name: fch4, dtype: float64


<IPython.core.display.Javascript object>

<BarContainer object of 2 artists>

In [68]:
X = 'fco2'
y = 'NEE_est'

FSO = pd.read_csv(Output_Path+'Skeeter_et_al_2022_Day_Fix.csv',index_col=['datetime']).drop('Unnamed: 0',axis=1)#.drop('datetime.1',axis=1)

print(FSO[['NEE_est','NEE_est_CI','NME_est','NME_est_CI']].mean().round(2))
# fig,ax=plt.subplots(1,2,figsize=(6,3))

DNA=(FSO[[X,y]].dropna())
RMSE = metrics.mean_squared_error(DNA[X],DNA[y])**.5
FSO[[X,y]].mean()
print('r2',(metrics.r2_score(DNA[X],DNA[y])*100).round(2))
print('RMSE',(RMSE).round(2))
print('MBE',((DNA[X]-DNA[y])).mean().round(2))

# X2 = sm.add_constant(DNA[X])
# est = sm.OLS(DNA[y], X2)
# est2 = est.fit()
# print(est2.summary())

X = 'fch4'
y = 'NME_est'

DNA=(FSO[[X,y]].dropna())

RMSE = metrics.mean_squared_error(DNA[X],DNA[y])**.5

# FSO[[X,y]].mean()
# print('r2',(metrics.r2_score(DNA[X],DNA[y])*100).round(2))
# print('RMSE',RMSE.round(2))
# print('MBE',((DNA[X]-DNA[y])).mean().round(2))

X2 = sm.add_constant(DNA[X])
est = sm.OLS(DNA[y], X2)
est2 = est.fit()
print(est2.summary())


NEE_est       -0.59
NEE_est_CI     0.28
NME_est       27.71
NME_est_CI     4.19
dtype: float64
r2 94.84
RMSE 0.34
MBE 0.0
                            OLS Regression Results                            
Dep. Variable:                NME_est   R-squared:                       0.785
Model:                            OLS   Adj. R-squared:                  0.785
Method:                 Least Squares   F-statistic:                     5269.
Date:                Thu, 17 Mar 2022   Prob (F-statistic):               0.00
Time:                        14:17:43   Log-Likelihood:                -4398.0
No. Observations:                1441   AIC:                             8800.
Df Residuals:                    1439   BIC:                             8810.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
---------

In [6]:
# plt.figure()
# plt.hist(EC_Data['flowrate_mean'])

# Merge

- Eddypro outputs have left labels (half hour timestamp corresponds to end of interval)
    * https://www.licor.com/documents/1ium2zmwm6hl36yz9bu4
- Need to close the right interval to match with AWS

In [7]:
for v1,v2 in zip(['Rain_mm_Tot','AirTC_Avg','wind_speed','NR_Wm2_Avg'],
                 ['Rainfall','Air_Temp','Wind_Speed','Net_RN']):
    DNA=Data_M1.loc[Data_M1['Active_Layer_1'].isnull()==False,[v1,v2]].dropna()
    print(v1,' ',v2)
    print('r2: ',(DNA[[v1,v2]].corr()**2)[v1].round(3)[1])
    print('RMSE: ',(metrics.mean_squared_error(DNA[v1],DNA[v2])**.5).round(3))
    print('MBE: ',np.round((DNA[v1]-DNA[v2]).mean(),2))
    print()

Rain_mm_Tot   Rainfall
r2:  0.988
RMSE:  0.05
MBE:  -0.01

AirTC_Avg   Air_Temp
r2:  0.988
RMSE:  0.547
MBE:  -0.09

wind_speed   Wind_Speed
r2:  0.945
RMSE:  0.43
MBE:  0.0

NR_Wm2_Avg   Net_RN
r2:  0.978
RMSE:  43.069
MBE:  9.45



# ECMWF

- ECMWF data does not account for DST
    - Must offset by one hour so summer times match

In [8]:
ECMWF = pd.read_csv(G_Drive+'earthengine//Climate_Weighted_Mean_long.csv',
                    parse_dates=['system:index'],na_values=-9999,index_col=['system:index'])

UTC = ECMWF.index+timedelta(hours=-1)
ECMWF = ECMWF.set_index(UTC)
ECMWF.index = ECMWF.index.tz_localize(pytz.utc).tz_convert(Mt)
ECMWF = ECMWF.drop(columns=['.geo'])

ECMWF['Wind_10m']  =(ECMWF['u_component_of_wind_10m']**2+ECMWF['v_component_of_wind_10m']**2)**.5
ECMWF['HH']=ECMWF['surface_sensible_heat_flux_hourly']/3600*-1
for v in ['temperature_2m' ,'soil_temperature_level_1','soil_temperature_level_2','soil_temperature_level_3']:
    ECMWF[v]-=273.15
ECMWF['total_precipitation_hourly']=ECMWF['total_precipitation_hourly']*1000
ECMWF[['SW','LW']]=ECMWF[['surface_net_solar_radiation_hourly',
    'surface_net_thermal_radiation_hourly']]/3600

ECMWF['RN']=ECMWF[['SW','LW']].sum(axis=1)

Data_M2 = Data_M1.join(ECMWF.loc[ECMWF.index.year>=2008],how = 'outer')

Data_M2['W_Dir_ERA5'] = get_wDir(Data_M2['u_component_of_wind_10m'].values,Data_M2['v_component_of_wind_10m'].values)
Data_M2['W_Dir_EC'] = get_wDir(Data_M2['U_bar_EC'].values,Data_M2['V_bar_EC'].values)

Data_M2['Wind_Direction']=Data_M2['Wind_Direction'].fillna(Data_M2['W_Dir_ERA5'])

In [9]:
ECMWF.columns

Index(['date', 'dewpoint_temperature_2m', 'snow_cover',
       'soil_temperature_level_1', 'soil_temperature_level_2',
       'soil_temperature_level_3', 'surface_latent_heat_flux_hourly',
       'surface_net_solar_radiation_hourly',
       'surface_net_thermal_radiation_hourly', 'surface_pressure',
       'surface_sensible_heat_flux_hourly',
       'surface_solar_radiation_downwards_hourly',
       'surface_thermal_radiation_downwards_hourly', 'temperature_2m',
       'total_evaporation_hourly', 'total_precipitation_hourly',
       'u_component_of_wind_10m', 'v_component_of_wind_10m',
       'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2',
       'Wind_10m', 'HH', 'SW', 'LW', 'RN'],
      dtype='object')

In [10]:
InSitu = ['AirTC_Avg','Air_Temp','Tdew','Rainfall']
#           'wind_speed','Wind_Speed','H','NR_Wm2_Avg','Net_RN','Net_SW','Net_LW']
ERA5 = ['temperature_2m','temperature_2m','dewpoint_temperature_2m','total_precipitation_hourly',
        'Wind_10m','Wind_10m','HH','RN','RN','SW','LW']

for v1,v2 in zip(InSitu,ERA5):#,'Wind_Speed','Net_RN']):
    DNA=Data_M2.loc[((Data_M2.index.month>=5)&(Data_M2.index.month<=10)),[v1,v2]].dropna()
    if v1 == 'Rainfall':
        DNA = DNA.rolling('3D').sum()
    print(v1,v2)
#     if v1 == 'H':
#         DNA[v2]/=-3600
    print('r2: ',(DNA[[v1,v2]].corr()**2)[v1].round(2)[1])
    print('RMSE: ',(metrics.mean_squared_error(DNA[v1],DNA[v2])**.5).round(3))
    print('MBE: ',np.round((DNA[v1]-DNA[v2]).mean(),2))
    print()

AirTC_Avg temperature_2m
r2:  0.91
RMSE:  1.52
MBE:  0.53

Air_Temp temperature_2m
r2:  0.93
RMSE:  2.068
MBE:  -0.14

Tdew dewpoint_temperature_2m
r2:  0.84
RMSE:  1.663
MBE:  1.12

Rainfall total_precipitation_hourly
r2:  0.59
RMSE:  3.408
MBE:  -1.29



In [11]:
InSitu = ['Temp_5_1','Temp_5_2','Temp_5_1','Temp_5_2',
#           'Temp_15_1','Temp_15_2',
          'Temp_15_1','Temp_15_2',
#           'VWC_1','VWC_1',
          'Table_1','Table_1']
ERA5 = ['soil_temperature_level_1','soil_temperature_level_1','soil_temperature_level_2','soil_temperature_level_2',
#         'soil_temperature_level_2','soil_temperature_level_2',
        'soil_temperature_level_3','soil_temperature_level_3',
#         'volumetric_soil_water_layer_1','volumetric_soil_water_layer_2',
        'volumetric_soil_water_layer_1','volumetric_soil_water_layer_2']

for v1,v2 in zip(InSitu,ERA5):#,'Wind_Speed','Net_RN']):
    DNA=Data_M2.loc[((Data_M2.index.month>=5)&(Data_M2.index.month<=10)),[v1,v2]].dropna()
    print(v1,v2)
#     if v1 == 'H':
#         DNA[v2]/=-3600
    print('r2: ',(DNA[[v1,v2]].corr()**2)[v1].round(2)[1])
    print('RMSE: ',(metrics.mean_squared_error(DNA[v1],DNA[v2])**.5).round(3))
    print('MBE: ',np.round((DNA[v1]-DNA[v2]).mean(),3))
    print()
    
# print(ECMWF.columns)

Temp_5_1 soil_temperature_level_1
r2:  0.43
RMSE:  7.169
MBE:  -5.912

Temp_5_2 soil_temperature_level_1
r2:  0.75
RMSE:  4.625
MBE:  -3.645

Temp_5_1 soil_temperature_level_2
r2:  0.67
RMSE:  4.928
MBE:  -4.58

Temp_5_2 soil_temperature_level_2
r2:  0.57
RMSE:  3.254
MBE:  -2.324

Temp_15_1 soil_temperature_level_3
r2:  0.32
RMSE:  4.325
MBE:  -4.059

Temp_15_2 soil_temperature_level_3
r2:  0.36
RMSE:  4.175
MBE:  -3.92

Table_1 volumetric_soil_water_layer_1
r2:  0.81
RMSE:  0.376
MBE:  -0.37

Table_1 volumetric_soil_water_layer_2
r2:  0.85
RMSE:  0.41
MBE:  -0.41



# Satelite & Stream

In [12]:
NDWI = pd.read_csv(G_Drive+'earthengine/NDWI_NBAR_LCP.csv',index_col=['date'],
                   parse_dates=['date'],na_values=-9999)
NDWI=NDWI.drop(['.geo','system:index'],axis=1)

NDVI = pd.read_csv(G_Drive+'earthengine/NDVI_NBAR_LCP.csv',index_col=['date'],
                   parse_dates=['date'],na_values=-9999)
NDVI=NDVI.drop(['.geo','system:index'],axis=1)

NDSI = pd.read_csv(G_Drive+'earthengine/NDSI_NBAR_LCP.csv',index_col=['date'],
                   parse_dates=['date'],na_values=-9999)
NDSI=NDSI.drop(['.geo','system:index'],axis=1)


NDSI2 = pd.read_csv(G_Drive+'earthengine/NDSI_LCP.csv',index_col=['date'],
                   parse_dates=['date'],na_values=-9999)
NDSI2=NDSI2.drop(['.geo','system:index'],axis=1)

NDSI2=NDSI2.rename(columns={'NDSI_FI_LCP':'NDSI_INDEX'})


NDSI2.loc[NDSI2.index.month<3, 'NDSI_INDEX']=60

NDSI2 = NDSI2.rolling(8*4,center=True,min_periods=4,win_type='gaussian').mean(std=2)


NBAR = NDVI.join(NDWI).join(NDSI).join(NDSI2)

print(NBAR.loc[NBAR.index.month==11].count())

print('Completeness')
Temp = NBAR.loc[((NBAR.index.year>=2008)&(NBAR.index.year<=2020)&
                    (NBAR.index.month>=4)&(NBAR.index.month<=11))].copy()
Temp['Amt'] = 1

print(((Temp.groupby(Temp.index.month).count().T[:3])/Temp.groupby(Temp.index.month).count()['Amt'].values).round(2))
print((Temp.count()/Temp.shape[0]).round(3))

GPP = pd.read_csv(G_Drive+'earthengine/GPP_FI_LCP.csv',parse_dates=['date'],na_values=-9999,index_col=['date']).dropna()
# GPP.index = GPP.index.tz_localize(pytz.utc).tz_convert(Mt)
GPP=GPP.drop(['.geo','system:index'],axis=1)
GPP['GPP_Ix'] = np.arange(0,GPP.shape[0])


GPP_fill = GPP.resample('D').asfreq()
GPP_fill['GPP_Ix'] = GPP_fill['GPP_Ix'].bfill()
GPP_fill.rename(columns={'GPP_FI_LCP':'GPP_est'})
GPP_fill = GPP_fill.rolling(8*4,center=True,min_periods=4,win_type='gaussian').mean(std=2)/8
GPP_fill.describe()

NBAR = NBAR.join(GPP_fill)

NBAR.index = NBAR.index.tz_localize(pytz.utc).tz_convert(Mt)

# NBAR.describe()
NBAR.describe()
# NDSI
# GPP

NDVI_FI_LCP      0
NDWI_FI_LCP      0
NDSI_FI_LCP      0
NDSI_INDEX     152
dtype: int64
Completeness
date          4    5    6    7    8     9     10   11
NDVI_FI_LCP  1.0  1.0  1.0  1.0  1.0  0.96  0.75  0.0
NDWI_FI_LCP  1.0  1.0  1.0  1.0  1.0  0.96  0.75  0.0
NDSI_FI_LCP  1.0  1.0  1.0  1.0  1.0  0.96  0.75  0.0
NDVI_FI_LCP    0.849
NDWI_FI_LCP    0.849
NDSI_FI_LCP    0.848
NDSI_INDEX     0.933
Amt            1.000
dtype: float64


Unnamed: 0,NDVI_FI_LCP,NDWI_FI_LCP,NDSI_FI_LCP,NDSI_INDEX,GPP_FI_LCP,GPP_Ix
count,3188.0,3187.0,3154.0,4147.0,3270.0,4596.0
mean,0.1998,-0.212407,0.142918,40.40167,6.174693,63.398329
std,0.249287,0.27329,0.624788,32.291629,9.168897,15.899442
min,-0.237516,-0.602743,-0.653383,0.0,0.0,35.60564
25%,-0.03915,-0.505814,-0.478626,0.360663,0.0,49.533571
50%,0.109791,-0.127998,-0.043249,60.0,1.557146,63.496312
75%,0.44905,0.053577,0.795341,67.372482,9.032001,77.443548
max,0.629275,0.253032,0.948897,95.01701,49.03716,91.374848


In [13]:
Gauge = pd.read_csv(Output_Path+'Gauge_Data.csv',
                   parse_dates=['Date'],na_values=['-9999'])
Gauge = Gauge.set_index(pd.DatetimeIndex(Gauge['Date']))
Gauge=Gauge.drop('Date',axis=1)
Gauge.loc[Gauge['EAST CHANNEL']>15,'EAST CHANNEL']=np.nan
Gauge.loc[((Gauge.index.year==2015)&(Gauge.index.dayofyear>=116)&
           (Gauge.index.dayofyear<=170)),'BIG LAKE']=np.nan
Gauge.index = Gauge.index.tz_localize(pytz.utc).tz_convert(Mt)
Data = Data_M2.join(NBAR.join(Gauge))
Data['DOY']=Data.index.dayofyear
Data['Year']=Data.index.year
Data['Month']=Data.index.month
Data['Date'] = Data.index.date
# Data.head()
Data['MM-DD'] = Data.index.strftime('%m-%d')
# Data[['NDSI_FI_LCP','NDVI_FI_LCP']]=Data[['NDSI_FI_LCP','NDVI_FI_LCP']].ffill()

# Daily

* Temperature Fill
* NDIS Fill

In [14]:
InSitu = ['Temp_5_1','Temp_5_2',
          'Temp_15_1','Temp_15_2',
          'Temp_5_1','Temp_5_2',
          'Temp_15_1','Temp_15_2',
          'Temp_5_1','Temp_5_2',
          'Temp_15_1','Temp_15_2',
          
         ]
ERA5 = ['soil_temperature_level_1','soil_temperature_level_1',
        'soil_temperature_level_1','soil_temperature_level_1',
        'soil_temperature_level_2','soil_temperature_level_2',
        'soil_temperature_level_2','soil_temperature_level_2',
        'soil_temperature_level_3','soil_temperature_level_3',
        'soil_temperature_level_3','soil_temperature_level_3',
        'volumetric_soil_water_layer_1','volumetric_soil_water_layer_1',
        'volumetric_soil_water_layer_1','volumetric_soil_water_layer_1',
        'volumetric_soil_water_layer_2','volumetric_soil_water_layer_2',
        'volumetric_soil_water_layer_2','volumetric_soil_water_layer_2'
       ]

Daily = Data.rolling('D').mean()
# print(Data)
Pairs = {'TS':[],
         'ERA5':[],
        'Hourly':[],
        'Daily':[],
#         'Weekly':[],
        }

for v1,v2 in zip(InSitu,ERA5):#,'Wind_Speed','Net_RN']):
    DNA2=Data.loc[((Data.index.month>=5)&(Data.index.month<=10)),[v1,v2]].dropna()
    DNA=Daily.loc[((Daily.index.month>=5)&(Daily.index.month<=10)),[v1,v2]].dropna()
#     DNA3=Weekly.loc[((Weekly.index.month>=5)&(Weekly.index.month<=10)),[v1,v2]].dropna()
#     Pair = (v1+' '+v2)
#     print((Daily[[v1,v2]].corr()**2)[v1].round(2))
    Pairs['TS'].append(v1)
    Pairs['ERA5'].append(v2)
    Pairs['Daily'].append((Daily[[v1,v2]].corr()**2)[v1].round(2)[1])
    Pairs['Hourly'].append((DNA2[[v1,v2]].corr()**2)[v1].round(2)[1])
#     Pairs['Weekly'].append((DNA2[[v1,v2]].corr()**2)[v1].round(2)[1])
Pairs = pd.DataFrame(data=Pairs)
Pairs['ERA5'] = Pairs['ERA5'].replace({'soil_temperature_level_1':'Level 1',# (0 - 7 cm)',
                                      'soil_temperature_level_2':'Level 2',# (7 - 28 cm)',
                                      'soil_temperature_level_3':'Level 3'})# (28 - 100 cm)'})
Pairs['TS'] = Pairs['TS'].replace({'Temp_5_1':'Polygon Center 5 cm',
                                   'Temp_5_2':'Polygon Rim 5 cm',
                                   'Temp_15_1':'Polygon Center 15 cm',
                                   'Temp_15_2':'Polygon Rim 15 cm',
                                  })
# Pairs.set_index(Pairs['ERA5'])
Pairs.groupby(['TS','ERA5']).mean()#.unstack()#.swaplevel(axis=1)
# Pairs

Unnamed: 0_level_0,Unnamed: 1_level_0,Hourly,Daily
TS,ERA5,Unnamed: 2_level_1,Unnamed: 3_level_1
Polygon Center 15 cm,Level 1,0.02,0.01
Polygon Center 15 cm,Level 2,0.09,0.11
Polygon Center 15 cm,Level 3,0.32,0.6
Polygon Center 5 cm,Level 1,0.43,0.67
Polygon Center 5 cm,Level 2,0.67,0.79
Polygon Center 5 cm,Level 3,0.32,0.48
Polygon Rim 15 cm,Level 1,0.01,0.01
Polygon Rim 15 cm,Level 2,0.09,0.1
Polygon Rim 15 cm,Level 3,0.36,0.57
Polygon Rim 5 cm,Level 1,0.75,0.81


In [15]:
Y = 'Temp'

Daily = Data.resample('D').mean()
Daily['MM-DD']=Data.resample('D').first()['MM-DD']
Daily['T_max'] = Data.resample('D').max()['Air_Temp']
Daily['T_min'] = Data.resample('D').min()['Air_Temp']
Daily['Date'] = Daily.index.date
Daily[['Rainfall','total_precipitation_hourly']] = Data.resample('D').sum()[['Rainfall','total_precipitation_hourly']]
# Daily = Daily.loc[((Daily.index.month>=4)&(Daily.index.month<=11))]
Daily = Daily.loc[((Daily.index.year>=2008)&(Daily.index.year<=2020))]

# X=['temperature_2m']
# y = ['Air_Temp']
# K=30
# Val = Model_Test(X,y,Daily.loc[~Daily[X].T.isnull().any()],
#                             K=K,Test = 'OLS',Scale=True,Task='Predict',Verbose=True,min_samples_split=2)
# Daily[Y]=np.nan
# Daily.loc[~Daily[X].T.isnull().any(),Y]=Val.flatten()
# Daily['Air_Temp']=Daily['Air_Temp'].fillna(Daily[Y])
# Daily['DOY']=Daily.index.dayofyear
# Daily['Year']=Daily.index.year

# X = ['snow_cover']
# y=['NDSI_FI_LCP']

# Val = Model_Test(X,y,Daily.loc[~Daily[X].T.isnull().any()],
#                             K=K,Test = 'OLS',Scale=True,Task='Predict',Verbose=True,min_samples_split=2)

# Daily['NDSI_fill'] = Daily['NDSI_FI_LCP'].copy()
# # Y = 'NDSI_fill'
# Daily[Y]=np.nan
# Daily.loc[~Daily[X].T.isnull().any(),Y]=Val.flatten()
# Daily['NDSI_fill']=Daily['NDSI_fill'].fillna(Daily[Y])

# Daily['NDVI_fill']=Daily['NDVI_FI_LCP'].interpolate()

# NDVI_thersh = Daily.loc[Daily['fco2'].isnull()==False,'NDVI_fill'].min()

# Snowmelt

In [16]:
print(Daily.groupby(Daily.index.month).min()['snow_cover'])

1     97.571267
2     97.571267
3     97.571267
4     97.571267
5      0.000000
6      0.000000
7      0.000000
8      0.000000
9      0.000000
10     0.007118
11    37.013013
12    97.571267
Name: snow_cover, dtype: float64


In [53]:
# Daily['Snow_Season1'] = 'Winter'
# Daily['Snow_Season2'] = 'Winter'


Classes = {'Snow Pack':100,
#           'Significant':97.5,#71267,
          'Significant':50,
          'Patchy':25,
          'Snow Free':5}

Values = {'Snow Pack':1/5,
#           'Significant':1/5,
          'Significant':3/5,
          'Patchy':4/5,
          'Snow Free':5/5}


Thresh = .5

Daily['Warm']=0
Data['Warm']=0
for c,v in Classes.items():
    Daily.loc[Daily['snow_cover']<=v,'Snow_Class']=c
    Daily.loc[((Daily['snow_cover']<=v)&(Daily['Air_Temp']>Thresh)),'Warm']=Values[c]/5
    Data.loc[Data['snow_cover']<=v,'Snow_Class']=c
    Data.loc[((Data['snow_cover']<=v)&(Data['Air_Temp']>Thresh)),'Warm']=Values[c]/5

# print(Daily.loc[Daily['Warm']!=Daily['Warm2'],['snow_cover','Air_Temp','Warm','Warm2']])
FDF = pd.DataFrame(data={})
FDF['Start'] = Daily.loc[Daily['Snow_Class']=='Snow Free'].resample('Y').first()['Date']
FDF['Start_NDSI'] = Daily.loc[Daily['NDSI_FI_LCP']<=0].resample('Y').first()['Date']
FDF['Start_doy'] = Daily.loc[Daily['Snow_Class']=='Snow Free'].resample('Y').first()['DOY']
FDF['Start_NDSI_doy'] = Daily.loc[Daily['NDSI_FI_LCP']<=0].resample('Y').first()['DOY']
FDF['End'] = Daily.loc[Daily['Snow_Class']=='Snow Free'].resample('Y').last()['Date']

FDF.index=FDF.index.year

Exp = 1/2
for y in range(2009,2021):
    Year = Daily.loc[Daily.index.year==y].copy()
    Year['Season_Sum'] = (Year['Warm']).cumsum()
    Daily.loc[Daily.index.year==y,'Season_Sum']=Year['Season_Sum']*1
    Daily.loc[Daily.index.year==y,'Season_Root']=Year['Season_Sum']**Exp
    FDF.loc[FDF.index==y,'Season_Sum']=Year['Season_Sum'].max()
    FDF.loc[FDF.index==y,'Season_Root']=Year['Season_Sum'].max()**Exp
FDF[['Start','End']]

Unnamed: 0,Start,End
2008,2008-05-26,2008-09-20
2009,2009-05-31,2009-09-19
2010,2010-05-22,2010-09-21
2011,2011-05-24,2011-09-24
2012,2012-05-24,2012-10-09
2013,2013-06-02,2013-09-27
2014,2014-05-28,2014-09-24
2015,2015-05-17,2015-09-19
2016,2016-05-12,2016-09-28
2017,2017-05-21,2017-10-08


In [54]:
FDF['Season_Root']=FDF['Season_Sum']**Exp

AL_obs = pd.read_csv(Output_Path+'AL_obs.csv',index_col=['Date'],parse_dates=['Date'])

AL_s = ['Reindeer Depot  (Thaw tube)','Lousy Point (Thaw tube)','Taglu (grid)']

ALH=pd.read_csv(Output_Path+'AL_Depth.csv',header=[1])
AL=pd.read_csv(Output_Path+'AL_Depth.csv',skiprows=4)
AL.columns=ALH.columns
AL=AL.rename(columns={'Name':'Year'})
AL = AL.set_index('Year')
AL[AL_s]/=100

AL = AL.loc[AL.index>=2009,AL_s]

T = Daily.loc[Daily.index.year>=2008].resample('Y').mean()[['Air_Temp']]
T['Year']=T.index.year
T = T.set_index('Year')
AL = AL.join(T).join(FDF)
Flood = Daily.loc[((Daily.index.month>=5)&(Daily.index.month<=6))].copy()

for y in range(2009,2018):
    Fy=Flood.loc[Flood.index.year==y].copy()
    for s in ['KULUARPAK','BIG LAKE']:
        if Fy[s].count()/(1464/24)>.3:
            if s == 'KULUARPAK':
                i = 0
            else:
                i = 1
            if s == 'KULUARPAK' and y == 2010:
                Fy.loc[((Fy.index.month==5)&(Fy.index.day<27)),s]=np.nan
#             else:
            v=Fy[s].max()
            d = Fy.loc[((Fy[s]==v))].index.date
            doy = Fy.loc[((Fy[s]==v))].index.dayofyear
            FDF.loc[FDF.index==y,s+'_Peak']=v
            FDF.loc[FDF.index==y,s+'_Peak_Date']=d
            FDF.loc[FDF.index==y,s+'_Peak_doy']=doy

Temp = FDF.dropna()

# FDF['BIG LAKE_Peak_doy'] = FDF['BIG LAKE_Peak_doy'].fillna(FDF['KULUARPAK_Peak_doy'] + 1)

print((FDF['Start']-FDF['KULUARPAK_Peak_Date']).describe())
print((FDF['Start']-FDF['BIG LAKE_Peak_Date']).describe())
# print((FDF['Start']-FDF['Start_NDSI']).describe())

print(FDF[['BIG LAKE_Peak_doy','KULUARPAK_Peak_doy','Start_NDSI_doy',
           'Start_doy']].corr()[['Start_NDSI_doy','Start_doy']]**2)

# FDF[['Start','BIG LAKE_Peak_Date']]



Daily['Study_Season']=0
for i,row in FDF.iterrows():
#     print(i.yea
    try:
        Daily.loc[((Daily.index.date>=row['Start'])&(Daily.index.date>row['KULUARPAK_Peak_Date'])&(Daily.index.date>row['BIG LAKE_Peak_Date'])
               &(Daily.index.date<=row['End'])),'Study_Season']=1
    except:
        try:
            Daily.loc[((Daily.index.date>=row['Start'])&(Daily.index.date>row['KULUARPAK_Peak_Date'])
               &(Daily.index.date<=row['End'])),'Study_Season']=1
        except:
#             print((FDF['BIG LAKE_Peak_Date']-FDF['Start']).median())
#             pass
            Daily.loc[((Daily.index.date>=row['Start']+(FDF['BIG LAKE_Peak_Date']-FDF['Start']).median())
               &(Daily.index.date<=row['End'])),'Study_Season']=1
#             pass
        pass
    
# print(Daily.resample('Y').sum()['Study_Season'])+timedelta(hours=6)
# FDF['BIg']

count                            9
mean             -7 days +00:00:00
std      2 days 17:43:36.144842231
min             -12 days +00:00:00
25%              -9 days +00:00:00
50%              -7 days +00:00:00
75%              -6 days +00:00:00
max              -3 days +00:00:00
dtype: object
count                              7
mean     -9 days +10:17:08.571428572
std        3 days 20:48:14.186890891
min               -14 days +00:00:00
25%               -12 days +12:00:00
50%                -7 days +00:00:00
75%                -6 days +00:00:00
max                -4 days +00:00:00
dtype: object
                    Start_NDSI_doy  Start_doy
BIG LAKE_Peak_doy         0.795349   0.821639
KULUARPAK_Peak_doy        0.744443   0.890978
Start_NDSI_doy            1.000000   0.682593
Start_doy                 0.682593   1.000000


In [55]:
fig,ax = plt.subplots(2,sharex=True,figsize=(5,4))

from matplotlib import cm
import matplotlib as mpl

norm = mpl.colors.Normalize(vmin=0, vmax=2018-2009)
Colors = []
for i in range(2009,2020):
#     rgba_color = cm.plasma(norm(i)) 
#     Colors.append(rgba_color)
    if i == 2017:
        Colors.append('#e31b1b')
    else:
        Colors.append('#7090c2')

for y in range(2009,2020):
    Fy=Daily.loc[((Daily.index.year==y)&(Daily.index.month<=10)&(Daily.index.month>=5))].copy()
    for s in ['KULUARPAK','BIG LAKE']:
        if Fy[s].count()/(1464/24)>.3:
            if s == 'KULUARPAK':
                i = 0
            else:
                i = 1
#             if s == 'KULUARPAK' and y == 2010:
#                 Fy.loc[((Fy.month==5)&(Fy.day<27)),s]=np.nan
#             else:
            v=Fy[s].max()
            d = Fy.loc[((Fy[s]==v))].index.date
            doy = Fy.loc[((Fy[s]==v))].index.dayofyear
#             FDF.loc[FDF.index==y,s+'_Peak']=v
#             FDF.loc[FDF.index==y,s+'_Peak_Date']=d
#             FDF.loc[FDF.index==y,s+'_Peak_doy']=doy
            ax[i].plot(Fy['MM-DD'],Fy[s],color=Colors[y-2009])

    
P = [0,14,31,45,61,75,92,106,123,137,153,167,183]
ax[1].set_xticks(P[1::2])
ax[1].set_xlim(0,183)

for label in ax[1].get_xticklabels():
    label.set_ha("center")
    label.set_rotation(45)

# ax[0,0].grid()
# ax[1,0]
ax[0].set_title('a. Kuluarpak Channel',loc='left')

ax[1].set_title('b. Big Lake',loc='left')
ax[0].set_ylabel('m')
ax[1].set_ylabel('m')
# ax[0,1].set_ylim(-.5,1)
# ax[0,0].set_ylim(-.5,1)
ax[1].set_ylim(8.25,10.5)
ax[0].set_ylim(8.25,10.5)
ax[0].grid()
ax[1].grid()
plt.tight_layout()

plt.savefig(G_Drive+'/PhD_Thesis/Chapter3_Figs/Addl.png',dpi=600)

<IPython.core.display.Javascript object>

# Flood

In [58]:
v = 'Season_Root'
param = v

X = [param]

print('AL Comparisson')
print(AL_s[0])
df = AL[[AL_s[0]]+X].dropna()
# print(df)
Mod = LinearRegression()
Mod.fit(df[X], df[AL_s[0]])
print(Mod.coef_)
Test = Mod.predict(df[X])
print(metrics.r2_score(df[AL_s[0]],Test))

print()
print(AL_s[1])
df = AL[[AL_s[1]]+X].dropna()
Mod = LinearRegression()
Mod.fit(df[X], df[AL_s[1]])
print(Mod.coef_)
Test = Mod.predict(df[X])
print(metrics.r2_score(df[AL_s[1]],Test))
AL = AL.loc[AL.index>=2008].copy()

Data.loc[((Data.index.month<=4)|(Data.index.month>=11)),['Season','Season_Sum','Season_Root',
#                                                          'Season_Root2',
                                                         'Study_Season']]=np.nan
Data.loc[Data['Study_Season'].isnull(),['Season_Sum','Season_Root',
#                                         'Season_Root2',
                                        'Study_Season']]=np.nan

for v in ['DOY','Season_Sum','Season_Root',
#           'Season_Root2',
        ]:
    for d in AL_obs.index.unique():
        AL_obs.loc[AL_obs.index==d,v]=Daily.loc[Daily.index.date==d,v].values[0]

y=['Active_Layer_1']
Y = 'AL_data_est'
Lag = []
D = []
K = 10

Val,C,I,RMSE,R2 = Model_Test(X,y,AL_obs.copy(),K=K,Test = 'OLS',Task='Predict_Return')#,fit_intercept=False)
AL_obs[Y]=Val
try:
    Daily['AL_data_est'] = (Daily[X[0]])*C[0]+(Daily[X[1]])*C[1]+I
except:
    Daily['AL_data_est'] = (Daily[X[0]])*C[0]+I
    pass
Data['AL_data_est']=Daily['AL_data_est'].resample('H').interpolate()
Data['Study_Season']=Daily['Study_Season'].resample('H').interpolate()
# Data['Snow_Class']=Daily['Snow_Class'].resample('H').ffill()
Data.loc[Data['Study_Season']<1,'Study_Season']=np.nan

# Data['AL_data_est']*=Data['Study_Season']
Data.loc[Data['AL_data_est']<=0,'AL_data_est']=0
# Data.loc[Data['AL_data_est']>0,'Study_Season']=1

T = Daily.loc[Daily.index.year>=2008].resample('Y').max()[['AL_data_est']]
T['Year']=T.index.year
T = T.set_index('Year')
try:
    AL = AL.drop(columns='AL_data_est')
except:
    pass
AL = AL.join(T)
AL = AL.loc[AL['AL_data_est'].isnull()==False].copy()

# print(Daily.loc[Daily['Snow_Season']=='Snow Free','AL_data_est'].resample('Y').min())
print()
print((AL.loc[AL.index>=2009,['AL_data_est']+AL_s].corr()**2))#['AL_data_est'])

print()

print(AL[AL_s].mean())
print(Data['Active_Layer_1'].describe())

AL Comparisson
Reindeer Depot  (Thaw tube)
[0.55424765]
0.8333514057700677

Lousy Point (Thaw tube)
[0.39270084]
0.8487907694800139

Active_Layer_1
RMSE 0.02326933048654617
std RMSE 0.006058995031414219
r2 0.925445526931973
coef [0.12209143]
int -0.07497931278118095

                             AL_data_est  Reindeer Depot  (Thaw tube)  \
AL_data_est                     1.000000                     0.833351   
Reindeer Depot  (Thaw tube)     0.833351                     1.000000   
Lousy Point (Thaw tube)         0.848791                     0.655218   
Taglu (grid)                    0.001206                     0.004664   

                             Lousy Point (Thaw tube)  Taglu (grid)  
AL_data_est                                 0.848791      0.001206  
Reindeer Depot  (Thaw tube)                 0.655218      0.004664  
Lousy Point (Thaw tube)                     1.000000      0.007456  
Taglu (grid)                                0.007456      1.000000  

Reindeer Depot  (Tha

In [57]:
print(param)
print(AL_obs[['DOY','Season_Root','Season_Sum','Active_Layer_1']].corr()['Active_Layer_1']**2)
y = ['Active_Layer_1']

X = ['DOY']
Val,C1,I1,MSE,R2 = Model_Test(X,y,AL_obs.copy(),K=K,Test = 'OLS',Task='Predict_Return')

Daily['DSSM'] = Daily.index.dayofyear-152
Sub = Daily.loc[((Daily.index.year==2017)&(Daily.Study_Season>=1))]

print(C1*np.arange(150,175)+I1)

print(param)
X = [param]
Val,C,I,MSE,R2 = Model_Test(X,y,AL_obs.copy(),K=K,Test = 'OLS',Task='Predict_Return')

print()
print()
print(C*np.arange(0,10,)**.5+I)

fig,ax=plt.subplots(figsize=(4,4)) 
plt.plot(Sub.index,Sub['DOY']*C1+I1,label='Linear r$^2$ = 0.94')
plt.plot(Sub.index,(Sub[X])*C+I,label='EQ. 2  r$^2$ = 0.93')

plt.scatter(AL_obs.index, AL_obs['Active_Layer_1'],color='grey',edgecolor='k')
ax.set_title('Thaw Depth Estimation')

ax.set_ylabel('Thaw Depth (m)')
ax.legend()

plt.grid()
# plt.ylim(0,0.65)
xt = plt.xticks()[0]
plt.xticks(xt[::2])#,ha='right')
fig.autofmt_xdate(ha='center')
# print(plt.xticks())
plt.tight_layout()
plt.savefig(G_Drive+'/PhD_Thesis/Chapter3_Figs/AL_fig.png',dpi=600)

Season_Root
DOY               0.962242
Season_Root       0.950564
Season_Sum        0.962092
Active_Layer_1    1.000000
Name: Active_Layer_1, dtype: float64

Active_Layer_1
RMSE 0.02012297971743496
std RMSE 0.006195651834781041
r2 0.9424498854788228
coef [0.00347315]
int -0.3787794038548565
[0.14219306 0.14566621 0.14913936 0.15261251 0.15608566 0.15955881
 0.16303196 0.16650511 0.16997826 0.17345141 0.17692456 0.18039771
 0.18387086 0.18734401 0.19081716 0.19429031 0.19776346 0.20123661
 0.20470976 0.20818291 0.21165606 0.21512921 0.21860236 0.22207551
 0.22554866]
Season_Root

Active_Layer_1
RMSE 0.02326933048654617
std RMSE 0.006058995031414219
r2 0.925445526931973
coef [0.12209143]
int -0.07497931278118095


[-0.07497931  0.04711212  0.09768404  0.13648925  0.16920355  0.19802542
  0.22408239  0.24804425  0.2703474   0.29129498]


<IPython.core.display.Javascript object>

# Fill Params

In [22]:
# H = 48
# C = pd.DataFrame(data={
#     'Temp_5_1':np.zeros(H),
#     'Temp_5_2':np.zeros(H),
#     'Temp_15_1':np.zeros(H),
#     'Temp_15_2':np.zeros(H)
# },
#      index=np.arange(1,H+1))
# for h in range(0,H):
#     for c in C.columns:
#         Data['Rolling_Temp'] = Data['Air_Temp'].shift(h+1)#.mean()
#         C.iloc[h][c] = (Data[['Rolling_Temp',c]].corr()['Rolling_Temp'][-1]).copy()
# fig,ax=plt.subplots(figsize=(4,4))
# C.index=C.index+1
# C.plot(ax=ax)
# plt.grid()
# # C.diff().plot(ax=ax)
# # C.loc[C.diff()['Temp_5_2']<=0]
# # print(Data.loc[Data['Study_Season']!=1,'Season_Sum'].max())
# plt.figure()
print(Data.loc[Data['Study_Season']==1].groupby('Snow_Class').count()['Air_Temp'])
Data.loc[((Data['Study_Season']==1)&(Data['Snow_Class']=='Significant')),'snow_cover'].resample('Y').count().dropna()
# plt.plot(Data.loc[Data.index.year==2016,'snow_cover'])

Snow_Class
Patchy          1323
Significant       60
Snow Free      31489
Snow Pack         60
Name: Air_Temp, dtype: int64


2009-12-31 00:00:00-07:00    10
2010-12-31 00:00:00-07:00     0
2011-12-31 00:00:00-07:00     0
2012-12-31 00:00:00-07:00     0
2013-12-31 00:00:00-07:00    11
2014-12-31 00:00:00-07:00     0
2015-12-31 00:00:00-07:00     0
2016-12-31 00:00:00-07:00    11
2017-12-31 00:00:00-07:00     0
2018-12-31 00:00:00-07:00     0
2019-12-31 00:00:00-07:00     0
2020-12-31 00:00:00-07:00    28
Freq: A-DEC, Name: snow_cover, dtype: int64

# Final?

In [23]:
L = [3,6,12,24,48]
Data['Level_1']=Data['soil_temperature_level_1'].rolling(str(24)+'H').mean()
Data['Level_2']=Data['soil_temperature_level_2'].rolling(str(24)+'H').mean()
Data['Level_3']=Data['soil_temperature_level_3'].rolling(str(24)+'H').mean()

Data['Level_1_VWC']=Data['volumetric_soil_water_layer_1'].rolling(str(24)+'H').mean()
Data['Level_2_VWC']=Data['volumetric_soil_water_layer_2'].rolling(str(24)+'H').mean()

Data['Rolling_RainfallD3']=Data['Rainfall'].rolling(str(3)+'D').sum()
Data['Rolling_RainfallD1']=Data['Rainfall'].rolling(str(1)+'D').sum()

Lags = []
for l in L:
#     Lags.append('Shift_Air_Temp_'+str(l))
#     Data['Shift_Air_Temp_'+str(l)] = Data['Air_Temp'].shift(l)#str(l)+'H')#.mean()
    Lags.append('Rolling_Air_Temp_'+str(l))
    Data['Rolling_Air_Temp_'+str(l)] = Data['Air_Temp'].rolling(str(l)+'H').mean()
for d in ['Rolling_RainfallD1','Rolling_RainfallD3']:
    Lags.append(d)
    
Y = 'Test'

SE_Rule = 3

Met = [
    'Net_SW',
    'Net_LW',
#     'Net_RN',
    'Air_Temp',
#     'Rainfall'
    ]
Met = Met + Lags

ERA5 = [
    'soil_temperature_level_1',
    'soil_temperature_level_2',
    'soil_temperature_level_3',
    'volumetric_soil_water_layer_1',
    'volumetric_soil_water_layer_2',
    ]

AllX = ERA5+Met#+Lag_Rain
# AllX = Met#+Lag_Rain

print(len(AllX))

Mods = {
}
for y,Z in zip([['Table_1'],['Temp_15_1'],['Temp_15_2'],['Temp_5_1'],['Temp_5_2']],[1,4,5,2,3]):
    X = AllX
    res = {
        'X':X,
        'Score':np.ones(len(X))/len(X)
    }
    Results = pd.DataFrame(data=res)
    K = 30
    Track = {'i':[],
#              'Z':[],
            'Rem':[],
            'RMSE':[],
            'RMSES':[],
            'R2':[],
            'R2S':[]
            }
    for i in range(len(X)):
        X = list(Results.X.values)
        Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                             Test = 'OLS',Scale=True,Task='Predict_Return',Verbose=False)
        MSE = np.array(MSE)**.5
        Results['Score'] = C**2/(C**2).sum()
        Results = Results.sort_values('Score')
        Rem = Results.loc[Results['Score']==Results['Score'].min(),'X'].values[0]
        Results = Results.loc[Results['Score']>Results['Score'].min()]
        Track['i'].append(len(X))
        Track['R2'].append(np.mean(np.array(R2)))
        Track['R2S'].append(np.std(np.array(R2))/(MSE.shape[0]**.5))
        Track['RMSE'].append(MSE.mean())
        Track['RMSES'].append(MSE.std()/(MSE.shape[0]**.5))
        Track['Rem'].append(Rem)

    Track = pd.DataFrame(data=Track)
    Track = Track.set_index('i')
    Track['RMSESS'] = (Track['RMSE'] + Track['RMSES']*SE_Rule)
    Track['R2SS'] = (Track['R2'] - Track['R2S']*SE_Rule)
    Mods[y[0]] = {}
#     Sel = Track.loc[Track['RMSE']<Track['RMSESS'].min(),['RMSE','RMSES','R2']][-1:]
#     Sel = Track.loc[Track['RMSE']/Track['RMSE'].min()>1.05,['RMSE','RMSES','R2']][0:1]
    Sel = Track.loc[Track['R2']/Track['R2'].max()>=0.95,['RMSE','RMSES','R2','R2S','R2SS']][-1:]
#     Sel = Track.loc[Track['R2SS']/Track['R2SS'].max()>0.98,['RMSE','RMSES','R2','R2S','R2SS']][-1:]
    M = Track.loc[Track.index<=Sel.index.values[0],'Rem'].values
    
    Mods[y[0]]['Stats']=Sel
    Mods[y[0]]['X']=M
    Mods[y[0]]['Z']=Z
    print(y[0])
    print()
    print(M)
    print()
    print(Sel.round(2))
    print('Max: ',Track['R2'].max().round(2))
    

    print()
    print()

15
Table_1

['soil_temperature_level_3' 'volumetric_soil_water_layer_1'
 'volumetric_soil_water_layer_2']

   RMSE  RMSES    R2  R2S  R2SS
i                              
3  0.01    0.0  0.93  0.0  0.92
Max:  0.96


Temp_15_1

['volumetric_soil_water_layer_2' 'soil_temperature_level_3' 'Air_Temp'
 'Rolling_Air_Temp_6' 'Net_SW']

   RMSE  RMSES    R2  R2S  R2SS
i                              
5  0.47   0.01  0.89  0.0  0.88
Max:  0.93


Temp_15_2

['volumetric_soil_water_layer_2' 'soil_temperature_level_3' 'Air_Temp'
 'Rolling_Air_Temp_6' 'Net_SW']

   RMSE  RMSES    R2   R2S  R2SS
i                               
5  0.53   0.01  0.86  0.01  0.84
Max:  0.9


Temp_5_1

['soil_temperature_level_3' 'Air_Temp' 'Rolling_Air_Temp_6']

   RMSE  RMSES    R2  R2S  R2SS
i                              
3  0.81   0.01  0.88  0.0  0.87
Max:  0.91


Temp_5_2

['Net_LW' 'Net_SW' 'Rolling_Air_Temp_6' 'soil_temperature_level_1'
 'Air_Temp' 'Rolling_Air_Temp_3']

   RMSE  RMSES    R2  R2S  R2SS
i        

In [24]:
Rec = {'Y':[],
      'R2':[]}
for x in AllX:
    Rec[x]=[]
    
for Y in Mods.keys():
    Rec['Y'].append(Y)
    X = list(Mods[Y]['X'])
    y = [Y]
    res = {
    'X':X,
    'Score':np.ones(len(X))/len(X),
    'Z':np.ones(len(X))
    }
    Results = pd.DataFrame(data=res)
    Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                             Test = 'OLS',Scale=True,Task='Predict_Return',Verbose=False)
    Results['Score'] = np.abs(C)/(np.abs(C)).sum()
    Results['Score'] = Results['Score'].round(2)
#     for i,row in Results.iterrows():
#         
    for c in AllX:
#         v = 0
        if Results.loc[Results['X']==c,'Score'].shape[0]>0:
            v = Results.loc[Results['X']==c,'Score'].values[0]
            v = str(int(v*100))+'%'
        else:
            v = '--'
#             pass
        Rec[c].append(v) 
    Rec['R2'].append(Mods[Y]['Stats']['R2'].round(2).values[0])

# print(Rec)
Rec = pd.DataFrame(data=Rec)
# Rec

Rec = Rec.rename(columns={'soil_temperature_level_1':'Ts Level 1',# (0 - 7 cm)',
                                      'soil_temperature_level_2':'Ts Level 2',# (7 - 28 cm)',
                                      'soil_temperature_level_3':'Ts Level 3',# (7 - 28 cm)',
                                      'volumetric_soil_water_layer_1':'Theta Level 1',# (7 - 28 cm)',
                                      'volumetric_soil_water_layer_2':'Theta Level 2',# (7 - 28 cm)',
                                      'Air_Temp':'Ta',
                                        'Net_SW':'SW',
                                        'Net_LW':'LW',
                                        'Rolling_Air_Temp_48':'Mean Ta 48 hr',
                                        'Rolling_Air_Temp_24':'Mean Ta 24 hr',
                                        'Rolling_Air_Temp_12':'Mean Ta 12 hr',
                                        'Rolling_Air_Temp_6':'Mean Ta 6 hr',
                                        'Rolling_Air_Temp_3':'Mean Ta 3 hr',
                                        'Rolling_RainfallD3':'Total Rainfall 3 D',
                                        'Rolling_RainfallD1':'Total Rainfall 1 D'})
Rec['Y'] = Rec['Y'].replace({'Temp_5_1':'Polygon Center 5 cm',
                                   'Temp_5_2':'Polygon Rim 5 cm',
                                   'Temp_15_1':'Polygon Center 15 cm',
                                   'Temp_15_2':'Polygon Rim 15 cm',
                                    'Table_1':'Wtd'
                                  })
Rec.set_index(['Y','R2']).T
# print(AllX)

Y,Wtd,Polygon Center 15 cm,Polygon Rim 15 cm,Polygon Center 5 cm,Polygon Rim 5 cm
R2,0.93,0.89,0.86,0.88,0.88
Ts Level 1,--,--,--,--,13%
Ts Level 2,--,--,--,--,--
Ts Level 3,20%,16%,18%,11%,--
Theta Level 1,45%,--,--,--,--
Theta Level 2,35%,7%,8%,--,--
SW,--,20%,15%,--,8%
LW,--,--,--,--,8%
Ta,--,23%,25%,34%,31%
Mean Ta 3 hr,--,--,--,--,16%
Mean Ta 6 hr,--,34%,34%,55%,24%


In [25]:
fig,ax=plt.subplots(len(Mods.keys()),sharex=True,figsize=(6,len(Mods.keys())*3))
j = 0
for Y in Mods.keys():
#     print(Mods[y]['X'])
    X = list(Mods[Y]['X'])
    y = [Y]
    res = {
    'X':X,
    'Score':np.ones(len(X))/len(X)
    }
    Results = pd.DataFrame(data=res)
    print(Mods[Y]['Stats'])
    Val = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],
                                K=K,Test = 'OLS',Scale=True,Task='Predict',Verbose=False)
#     Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
#                              Test = 'OLS',Scale=True,Task='Predict_Return',Verbose=False)
    
#     Results['Score'] = np.abs(C)/(np.abs(C)).sum()
#     Results = Results.sort_values('Score')
#     print(Results.sort_values(by='Score'))
    Data[Y+'_OLS']=np.nan
    Data.loc[~Data[X].T.isnull().any(),Y+'_OLS']=Val
                     
    DNA = Data[y+[Y+'_OLS']].dropna()
    print()
    print('r2: ',metrics.r2_score(DNA[y],DNA[Y+'_OLS']))
    print('RMSE: ',metrics.mean_squared_error(DNA[y],DNA[Y+'_OLS'])**.5)


    for y in range(2009,2020):
        Yr = Data.loc[Data.index.year==y].resample('D').mean()
        if y == 2017:
            ax[j].plot(Yr['DOY'],Yr[Y+'_OLS'],color='r',linewidth=5)
        else:
            ax[j].plot(Yr['DOY'],Yr[Y+'_OLS'],color='b',linewidth=1)
    ax[j].plot(Data['DOY'],Data[Y],color='k')
    ax[j].set_title(Y+' '+ str(Mods[Y]['Stats']['R2'].round(2).values[0]))


    ax[j].axvspan(147,174,facecolor=[.97,.97,.97,.65],edgecolor='k')
    ax[j].axvspan(256,295,facecolor=[.97,.97,.97,.65],edgecolor='k')
    ax[j].set_xlim(130,310)
    j +=1

<IPython.core.display.Javascript object>

       RMSE     RMSES        R2      R2S      R2SS
i                                                 
3  0.008395  0.000202  0.933515  0.00403  0.921426

r2:  0.9348390975423464
RMSE:  0.008402939484781127
       RMSE     RMSES        R2       R2S      R2SS
i                                                  
5  0.466173  0.005114  0.894321  0.003626  0.883442

r2:  0.8963295474999626
RMSE:  0.46521418274008397
      RMSE     RMSES        R2       R2S      R2SS
i                                                 
5  0.52723  0.010495  0.864002  0.006702  0.843896

r2:  0.8662014009231631
RMSE:  0.5264793003267267
       RMSE     RMSES        R2       R2S     R2SS
i                                                 
3  0.806926  0.007279  0.883755  0.003215  0.87411

r2:  0.8856357203319244
RMSE:  0.8053131754095524
       RMSE    RMSES        R2       R2S      R2SS
i                                                 
6  1.164361  0.01892  0.875505  0.004187  0.862945

r2:  0.877945380525764
R

In [26]:
# print(Data['Study_Season'].resample('Y').max())

# print(Data_d.loc[Data_d['AL_data_est']>0.15]
Data['DOY']=Data.index.dayofyear
D = Data.resample('D').mean()
print(D.loc[D['Temp_5_1_OLS']>0].resample('Y').first()[['AL_data_est','Temp_5_1_OLS','Temp_15_2_OLS','DOY']].mean())
print(D.loc[D['Temp_15_1_OLS']>0].resample('Y').first()[['AL_data_est','Temp_5_1_OLS','Temp_15_2_OLS','DOY']].mean())
print(D.loc[D['AL_data_est']>0.05].resample('Y').first()[['Temp_5_1_OLS','Temp_15_2_OLS','DOY']].mean())
print(D.loc[D['AL_data_est']>0.15].resample('Y').first()[['Temp_5_1_OLS','Temp_15_2_OLS','DOY']].mean())

AL_data_est        0.047923
Temp_5_1_OLS       0.876190
Temp_15_2_OLS     -0.925901
DOY              155.846154
dtype: float64
AL_data_est        0.175426
Temp_5_1_OLS       3.960630
Temp_15_2_OLS      0.366207
DOY              173.153846
dtype: float64
Temp_5_1_OLS       0.100277
Temp_15_2_OLS     -1.349317
DOY              149.166667
dtype: float64
Temp_5_1_OLS       1.980942
Temp_15_2_OLS     -0.721007
DOY              163.500000
dtype: float64


In [27]:
Y = 'Test'
SE_Rule = 1
Met = [
    'Net_LW',
    'Net_SW',
    'Wind_Speed',
    'Air_Temp',
    'surface_solar_radiation_downwards_hourly',
#     'Rainfall'
    ]
ERA5_Met = [
    'temperature_2m',
    'dewpoint_temperature_2m',
    'Wind_10m',
    'HH',
    ]
AllX = Met+ERA5_Met
print(len(AllX))
Mods = {}
for y in [['u*'],['PPFD_Avg'],['VPD']]:
    X = AllX
    res = {
        'X':X,
        'Score':np.ones(len(X))/len(X)
    }
    Results = pd.DataFrame(data=res)
    K = 30
    Track = {'i':[],
            'Rem':[],
            'RMSE':[],
            'RMSES':[],
             'Int':[],
             'Coef':[],
            'R2':[],
            'R2S':[]}
    for i in range(len(X)):
        X = list(Results.X.values)
        Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                             Test = 'OLS',Scale=True,Task='Predict_Return',Verbose=False)
        MSE = np.array(MSE)**.5
        Results['Score'] = C**2/(C**2).sum()
        Results = Results.sort_values('Score')
        Rem = Results.loc[Results['Score']==Results['Score'].min(),'X'].values[0]
        Results = Results.loc[Results['Score']>Results['Score'].min()]
        Track['i'].append(len(X))
        Track['Int'].append(np.mean(np.array(I)))
        Track['Coef'].append(np.mean(np.array(C)))
        Track['R2'].append(np.mean(np.array(R2)))
        Track['R2S'].append(np.std(np.array(R2)))
        Track['RMSE'].append(MSE.mean())
        Track['RMSES'].append(MSE.std())#/(MSE.shape[0]**.5))
        Track['Rem'].append(Rem)

    Track = pd.DataFrame(data=Track)
    Track = Track.set_index('i')
    Track['RMSESS'] = (Track['RMSE'] + Track['RMSES']*SE_Rule)
    Track['R2SS'] = (Track['R2'] - Track['R2S']*SE_Rule)
    Mods[y[0]] = {}
    Sel = Track.loc[Track['RMSE']<Track['RMSESS'].min(),['RMSE','RMSES','R2','R2S','Coef','Int']][-1:]
    Sel = Track.loc[Track['R2']/Track['R2'].max()>0.98,['RMSE','RMSES','R2']][-1:]
    M = Track.loc[Track.index<=Sel.index.values[0],'Rem'].values
    Mods[y[0]]['Stats']=Sel
    Mods[y[0]]['X']=M
    print(y)
    print(M)
    print(Sel)
#     print(Track)
    print()

9
['u*']
['Wind_Speed']
       RMSE     RMSES       R2
i                             
1  0.045001  0.003854  0.89794

['PPFD_Avg']
['Net_SW']
        RMSE     RMSES       R2
i                              
1  38.631624  2.556721  0.99004

['VPD']
['temperature_2m' 'dewpoint_temperature_2m' 'Air_Temp']
         RMSE     RMSES        R2
i                                
3  131.354226  8.698229  0.876402



In [28]:
X = ['Wind_Speed']
y = ['u*']
Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                     Test = 'OLS',Scale=False,Task='Predict_Return',Verbose=True)

X = ['Net_SW']
y = ['PPFD_Avg']
Data['Net_SW_T'] = Data['Net_SW'].copy()
Data.loc[Data['Net_SW_T']<0,'Net_SW_T']=0
Val,C,I,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                     Test = 'OLS',Scale=False,Task='Predict_Return',Verbose=True)


u*
RMSE 0.04500149440521988
std RMSE 0.0038542468485703867
r2 0.8948652491619089
coef [0.07370356]
int 0.021367641498866184

PPFD_Avg
RMSE 38.63162448677514
std RMSE 2.5567209462702647
r2 0.9904007390827857
coef [2.38028641]
int 3.848866329466529


In [29]:
fig,ax=plt.subplots(len(Mods.keys()),sharex=True,figsize=(6,len(Mods.keys())*3))
j = 0
for Y in Mods.keys():
#     print(Mods[y]['X'])
    X = list(Mods[Y]['X'])
    y = [Y]
    print(Mods[Y]['Stats'])
    Val = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],
                                K=K,Test = 'OLS',Scale=True,Task='Predict',Verbose=True)
    Data[Y+'_OLS']=np.nan
    Data.loc[~Data[X].T.isnull().any(),Y+'_OLS']=Val
                     
    DNA = Data[y+[Y+'_OLS']].dropna()
    print()
    print('r2: ',metrics.r2_score(DNA[y],DNA[Y+'_OLS']))
    print('RMSE: ',metrics.mean_squared_error(DNA[y],DNA[Y+'_OLS'])**.5)


    for y in range(2009,2020):
        Yr = Data.loc[Data.index.year==y].resample('D').mean()
        if y == 2017:
            ax[j].plot(Yr['DOY'],Yr[Y+'_OLS'],color='r',linewidth=5)
        else:
            ax[j].plot(Yr['DOY'],Yr[Y+'_OLS'],color='b',linewidth=1)
    ax[j].plot(Data['DOY'],Data[Y],color='k')
    ax[j].set_title(Y+' '+ str(Mods[Y]['Stats']['R2'].round(2).values[0]))
    
    ax[j].axvspan(147,174,facecolor=[.97,.97,.97,.65],edgecolor='k')
    ax[j].axvspan(256,295,facecolor=[.97,.97,.97,.65],edgecolor='k')
    j +=1

<IPython.core.display.Javascript object>

       RMSE     RMSES       R2
i                             
1  0.045001  0.003854  0.89794

u*
RMSE 0.04500149440521988
std RMSE 0.0038542468485703893
r2 0.8948652491619089
coef [0.13501616]
int 0.3044481420598676

r2:  0.8998657373147907
RMSE:  0.045041291088142706
        RMSE     RMSES       R2
i                              
1  38.631624  2.556721  0.99004

PPFD_Avg
RMSE 38.63162448677515
std RMSE 2.556720946270257
r2 0.9904007390827857
coef [389.66543789]
int 333.84664621212124

r2:  0.990258864514813
RMSE:  38.64921631423895
         RMSE     RMSES        R2
i                                
3  131.354226  8.698229  0.876402

VPD
RMSE 131.3542258113198
std RMSE 8.698228786674118
r2 0.8750078566662682
coef [ 145.81555108 -250.75086217  355.68524691]
int 335.0730372832665

r2:  0.8699919742076736
RMSE:  136.92272930204703


In [30]:
# plt.figure()
# plt.scatter(Data['PPFD_Avg_OLS'],Data['Net_SW'])

In [31]:


Y = 'Test'

# SE_Rule = 2

Met = [
    
    'Air_Temp',
#     'Rainfall'
    ]
ERA5_Met = [
    'temperature_2m',
    'dewpoint_temperature_2m',
    'W_Dir_ERA5',
#     'Wind_10m',
#     'HH',
    ]


AllX = Met+ERA5_Met
print(len(AllX))

Mods = {
}
for y in [['VPD']]:
    X = AllX
    res = {
        'X':X,
        'Score':np.ones(len(X))/len(X)
    }
    Results = pd.DataFrame(data=res)
    K = 10
    Track = {'i':[],
            'Rem':[],
            'RMSE':[],
            'RMSES':[],
            'R2':[],
            'R2S':[]}
    for i in range(len(X)):
        X = list(Results.X.values)
        Val,C,MSE,R2 = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],K=K,
                             Test = 'RF',Task='Predict_Return',Verbose=False,min_samples_split=2)
        MSE = np.array(MSE)**.5
        Results['Score'] = C#/C.sum()
        Results = Results.sort_values('Score')
        Rem = Results.loc[Results['Score']==Results['Score'].min(),'X'].values[0]
        Results = Results.loc[Results['Score']>Results['Score'].min()]
        Track['i'].append(len(X))
        Track['R2'].append(np.mean(np.array(R2)))
        Track['R2S'].append(np.std(np.array(R2)))
        Track['RMSE'].append(MSE.mean())
        Track['RMSES'].append(MSE.std())#/(MSE.shape[0]**.5))
        Track['Rem'].append(Rem)

    Track = pd.DataFrame(data=Track)
    Track = Track.set_index('i')
    Track['RMSESS'] = (Track['RMSE'] + Track['RMSES']*SE_Rule)
    Track['R2SS'] = (Track['R2'] - Track['R2S']*SE_Rule)
    Mods[y[0]] = {}
    Sel = Track.loc[Track['RMSE']<Track['RMSESS'].min(),['RMSE','RMSES','R2','R2S']][-1:]
#     Sel = Track.loc[Track['R2']/Track['R2'].max()>0.95,['RMSE','RMSES','R2']][-1:]
    M = Track.loc[Track.index<=Sel.index.values[0],'Rem'].values
    Mods[y[0]]['Stats']=Sel
    Mods[y[0]]['X']=M
    print(y)
    print(M)
    print(Sel)
    print(Track)
    
    print()

4
['VPD']
['W_Dir_ERA5' 'temperature_2m' 'dewpoint_temperature_2m' 'Air_Temp']
         RMSE     RMSES        R2       R2S
i                                          
4  104.457011  5.074687  0.921623  0.013935
                       Rem        RMSE      RMSES        R2       R2S  \
i                                                                       
4               W_Dir_ERA5  104.457011   5.074687  0.921623  0.013935   
3           temperature_2m  111.506629   5.081424  0.910742  0.015311   
2  dewpoint_temperature_2m  122.689410   6.846274  0.891676  0.021198   
1                 Air_Temp  183.017237  15.410314  0.759012  0.047165   

       RMSESS      R2SS  
i                        
4  109.531698  0.907688  
3  116.588053  0.895431  
2  129.535684  0.870478  
1  198.427552  0.711847  



In [32]:
fig,ax=plt.subplots(len(Mods.keys()),sharex=True,figsize=(6,len(Mods.keys())*3))
if len(Mods.keys())==1:
    ax = [ax]
j = 0
for Y in Mods.keys():
#     print(Mods[y]['X'])
    X = list(Mods[Y]['X'])
    y = [Y]
    print(Mods[Y]['Stats'])
    Val = Model_Test(X,y,Data.loc[~Data[X].T.isnull().any()],
                                K=K,Test = 'RF',Scale=True,Task='Predict',Verbose=False,min_samples_split=2)
    Data[Y+'_RF']=np.nan
    Data.loc[~Data[X].T.isnull().any(),Y+'_RF']=Val
                     
    DNA = Data[y+[Y+'_RF']].dropna()
    print()
    print('r2: ',metrics.r2_score(DNA[y],DNA[Y+'_RF']))
    print('RMSE: ',metrics.mean_squared_error(DNA[y],DNA[Y+'_RF'])**.5)


    for y in range(2009,2020):
        Yr = Data.loc[Data.index.year==y].resample('D').mean()
        if y == 2017:
            ax[j].plot(Yr['DOY'],Yr[Y+'_RF'],color='r',linewidth=5)
        else:
            ax[j].plot(Yr['DOY'],Yr[Y+'_RF'],color='b',linewidth=1)
    ax[j].plot(Data['DOY'],Data[Y],color='k')
    ax[j].set_title(Y+' '+ str(Mods[Y]['Stats']['R2'].round(2).values[0]))


    ax[j].axvspan(147,174,facecolor=[.97,.97,.97,.65],edgecolor='k')
    ax[j].axvspan(256,295,facecolor=[.97,.97,.97,.65],edgecolor='k')
    j +=1

<IPython.core.display.Javascript object>

         RMSE     RMSES        R2       R2S
i                                          
4  104.457011  5.074687  0.921623  0.013935

r2:  0.9863968838571187
RMSE:  44.29040674787846


# Outputs

In [33]:
cols =['fch4','fco2','ch4_flux','co2_flux','Air_Temp','Rainfall','Wind_Speed','Wind_Direction',
        'Net_SW','Net_LW','Net_RN','Daytime','temperature_2m','dewpoint_temperature_2m',
       'volumetric_soil_water_layer_1','volumetric_soil_water_layer_2',
#        'volumetric_soil_water_layer_3',
#        'volumetric_soil_water_layer_4',
       'soil_temperature_level_1','soil_temperature_level_2',
       'soil_temperature_level_3',#'soil_temperature_level_4',
       'DOY',
       'NDSI_FI_LCP','NDVI_FI_LCP',#'NDSI_fill','NDVI_fill',
       'GPP_FI_LCP','GPP_Ix',
       'snow_cover',
#        'NDSI_interp','GPP_interp','NDVI_interp',
       'Polygon','Rim','Polygon_Obs',
       'Rim_Obs',
       'VPD_RF',
       'Wind_Speed',
      'Net_RN',
       'Temp_5_1_OLS',
       'Temp_15_2_OLS',
       'Temp_15_1_OLS',
       'Temp_5_2_OLS',
       'VPD_RF',
       'PPFD_Avg_OLS',
       'u*_OLS',
       'Table_1_OLS',
       'AL_data_est',
       'Study_Season','Warm','Snow_Class',
#        'Snow_Season','Green_Season','Warm',
#        'Freezing','Green','Snow',
#        'Ts_15_1_OLS','Ts_5_1_OLS,','Ts_15_2_OLS','Ts_5_2_OLS',#'VPD_OLS',
#        'PPFD_AWS_RF','u*_est_RF','WTD_RF','AL_data_est_RF',
       'NEE_FSO','NME_FSO','Season_Sum','Season_Root',#'Season_Sum2',
#        'Season',#,'Season2','Season3',
      'Daytime',
      ]

RN = {'Wind_Speed':'wind_speed',
      'Net_RN':'NR_Wm2_Avg',
       'Temp_5_1_OLS':'Temp_5_1',
       'Temp_15_2_OLS':'Temp_15_2',
       'Temp_15_1_OLS':'Temp_15_1',
       'Temp_5_2_OLS':'Temp_5_2',
       'VPD_RF':'VPD',
       'PPFD_Avg_OLS':'PPFD_Avg',
       'u*_OLS':'u*',
       'Table_1_OLS':'Table_1',
       'AL_data_est':'Active_Layer_1',
     }


# 58.8%	25.6%

Data['Polygon_Obs'] = Data['Polygon'].copy()
# Data['Polygon']=Data['Polygon'].median()
Data['Polygon'] = 0.588 # ALF derrived values
Data['Rim_Obs'] = Data['Rim'].copy()
# Data['Rim']=Data['Rim'].median()
Data['Rim'] = 0.256 # ALF derrived values


for col in RN:
#     print(columns)
    Data[RN[col]+'_FO'] = Data[RN[col]].copy()
    Data[RN[col]+'_Diff'] = Data[RN[col]+'_FO']-Data[col]
    cols.append(RN[col]+'_FO')
    cols.append(RN[col]+'_Diff')

# Path = 'G:/My Drive/FishIsland_Outputs/Network_Outputs/Arctic_Sci_8Vars/'
    
Export = Data.copy()
Export.index.name = 'datetime'
Export.index = Export.index.tz_localize(None)
Export = Export.loc[((Export.index.year>=2009)&(Export.index.year<=2019))]
# Export[cols].rename(columns=RN).to_csv(Output_Path+'Network_Outputs/Arctic_Sci_8Vars/AWS_FI_Test.csv')
Export[cols].rename(columns=RN).to_csv(Output_Path+'Network_Outputs/Arctic_Sci_8Vars/AWS_FI_Final.csv')
# Export[cols].rename(columns=RN).to_csv('C:\\Users\\wesle\\NetworkAnalysis\\FishIsland/AWS_FI_Test.csv')
print('Done')

Done


# WX Summary Stats


In [34]:
# print(AL.columns)
# print(AL[['Peak_Day','Peak_Height','Peak_NDVI']].corr()**2)
# print(AL[['Peak_Day_BL','Peak_Height_BL']].corr()**2)
# print(AL[['Peak_Height_BL','Peak_Height']].corr()**2)
# print()
# print(AL['Peak_NDVI'].median())
# # print(np.round(AL['Peak_Height'].max()-AL['Peak_Height'].min(),2))
# # print(np.round(AL['Peak_Height'].max()-AL['Peak_Height'].min(),2))

# print(AL['Start_Day'])
# AL['Peak_Date'] = pd.to_datetime(AL.index * 1000 + AL['Peak_Day'], format='%Y%j')
# AL['Peak_Date_BL'] = pd.to_datetime(AL.index * 1000 + AL['Peak_Day_BL'], format='%Y%j')
# AL['Start_Date'] = pd.to_datetime(AL.index * 1000 + AL['Start_Day'], format='%Y%j')
# AL['End_Date'] = pd.to_datetime(AL.index * 1000 + AL['End_Day'], format='%Y%j')
# Yrly = AL[['Peak_Height','Peak_Date','Peak_Date_BL','Peak_Height',]]#.sort_values(by='Season_Sum')
# print(Yrly)
# Yrly = AL[['Start_Date','End_Date','Season_Sum']].reset_index(drop=True)#.sort_values(by='Season_Sum').reset_
# print(Yrly)
# 

In [35]:
# plt.figure()
# for y in range(2009,2020):
#     Y = Daily.loc[Daily.index.year==y]
#     plt.plot(Y.index.dayofyear,Y['NDSI_fill'])

In [36]:
# Data['MM-DD'] = Data.index.strftime('%m-%d')
# Daily = Data.groupby('MM-DD').mean()
# Dailymn = Data.groupby('MM-DD').min()
# Dailymx = Data.groupby('MM-DD').max()
# fig,ax = plt.subplots(2,sharex=True,figsize=(7,7))
# from scipy import stats
# # ax[0].errorbar(Daily.index,Daily['Air_Temp'],yerr=Dailys['Air_Temp'],color='r',ecolor='grey')
# ax[0].plot(Daily.index,Daily['Air_Temp'],color='k')
# ax[0].plot(Daily.index,Dailymn['Air_Temp'],color='b')
# ax[0].plot(Daily.index,Dailymx['Air_Temp'],color='r')

# # ax[0].plot(Daily.index,Daily['Air_Temp']+Dailys['Air_Temp']/(11**.5)*stats.t.ppf(1-0.025,11),color='r')#,ecolor='grey')
# ax[0].grid()

# ax[1].plot(Daily.index,Daily['Net_RN'],color='k')
# ax[1].plot(Daily.index,Dailymn['Net_RN'],color='b')
# ax[1].plot(Daily.index,Dailymx['Net_RN'],color='r')
# ax[1].grid()

# # Xt = ax[0].get_xticks()
# # fig.canvas.draw()
# # Xtl = ax[0].get_xticklabels()

# P = np.arange(0,365,15)#[0,14,31,45,61,75,92,106,123,137,153,167,183]
# # P = [0,14,31,45,61,75,92,106,123,137,153,167,183]
# ax[-1].set_xticks(P)
# ax[-1].set_xlim(0,365)
# print(Dailymx.loc[Dailymx['Air_Temp']==Dailymx['Air_Temp'].min(),['DOY']])
# # print(Dailymx)
# fig.autofmt_xdate()

In [37]:
v = 'Wind_Speed'
Mw = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&(Data['Study_Season']==1)),v].mean()
Sw = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&(Data['Study_Season']==1)),v].std()
Storm = Mw + Sw*2
print(Storm)

7.373146406951779


In [38]:
# Monthly = Data.groupby('Month')[['Air_Temp']]
# Monthly
# Data['Month']
plt.rcParams['axes.axisbelow'] = True

# Daily = Data.resample('D').mean()

Annual = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009))].resample('Y').mean()#['Air_Temp']
Annualmx = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009))].resample('Y').max()#['Air_Temp']
Season = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
                   (Data.Study_Season==1)
                  )].resample('Y').mean()

Season = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
                   (Data.Study_Season==1)
                  )].resample('Y').mean()#['Air_Temp']

Season[['NDVI','AL_data_est','Tmax']] = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
                   (Data.Study_Season==1)
                  )].resample('Y').max()[['NDVI_FI_LCP','AL_data_est','Air_Temp']]
SeasonS = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
                    (Data.Study_Season==1)
                   )].resample('Y').std()#['Air_Temp']

SeasonC = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
                   (Data.Study_Season==1)
                   )].resample('Y').count()#['Air_Temp']

Season[['Rainfall','Study_Season']] = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
                               (Data.Study_Season==1)
                              )].resample('Y').sum()[['Rainfall','Study_Season']]
Season.loc[Season.index==Season.index[0],'Rainfall']=np.nan

Season['WC'] = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
                               (Data.Study_Season==1)&(Data.Wind_Speed>=Storm)
                              )].resample('Y').count()[['Wind_Speed']]


fig,axes=plt.subplots(2,2,figsize=(6,6),sharex=True)



ax = axes[0,0]
ax.bar(Season.index,Season['Rainfall'],width=200,edgecolor='k',color='#4dc7f7')
# ax.set_xlim(axes[0].get_xlim())
ax.set_title('a. Total Precipitation',loc='left')
ax.set_ylabel('mm')
ax.grid()


ax = axes[0,1]
ax.bar(Season.index,Season['WC'],width=200,edgecolor='k',color='#f5b942', capsize=5)
ax.set_title('b. High Wind Conditions',loc='left')
ax.set_ylabel('hours')
ax.grid()


ax = axes[1,0]
ax.bar(Season.index,Season['AL_data_est'],width=200,edgecolor='k',color='#e5a7e8', capsize=5)
ax.set_title('c. Maxium Thaw Depth',loc='left')
ax.set_ylabel('m')
ax.grid()


ax = axes[1,1]
ax.bar(Season.index,Season['NDVI'],width=200,edgecolor='k',color='#ccff99')
# ax.set_xlim(ax[1].get_xlim())
ax.set_title('d. Maximum NDVI',loc='left')
ax.set_ylabel('mm')
ax.grid()



# Course = Daily.groupby(Daily.index.dayofyear).mean()

# ax[3].plot(Course['NDVI_FI_LCP'])

plt.suptitle('Fish Island Snow-free Seasons (2009 - 2019)')
plt.tight_layout()
plt.savefig(G_Drive+'PhD_Thesis/Chapter3_Figs/'+'Climatology.png')

<IPython.core.display.Javascript object>

In [39]:

D = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&(Data['Month']<=8)&(Data['Month']>=5))]

plt.figure()
plt.scatter(D['AL_data_est'],D['NDVI_FI_LCP'])


<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x1fe016642b0>

In [40]:
# print(Daily.resample('Y').max()[['AL_data_est','Season_Root','Season_Sum']])
# Temp
# AL

# Data.loc[Data['Study_Season']==1].resample('Y').max()['AL_data_est']
# # Temp

Temp = FDF.copy()#loc[FDF.index>2008].copy()

Temp['Study_Start'] = (Data.loc[Data['Study_Season']==1].resample('Y').min()['Date'].values)
Temp['Study_End'] = (Data.loc[Data['Study_Season']==1].resample('Y').max()['Date'].values)
Temp['Study_Start']=pd.to_datetime(Temp['Study_Start']).dt.strftime('%b-%d')
Temp['Study_End']=pd.to_datetime(Temp['Study_End']).dt.strftime('%b-%d')
Temp['AL'] = (Data.loc[Data['Study_Season']==1].resample('Y').max()['AL_data_est'].round(3).values)
Temp['S'] = (Data.loc[Data['Study_Season']==1].resample('Y').max()['Season_Sum'].values)
# AL[AL_s]
# AL[AL_s]
# Temp[[]]
# Temp

Temp['Study_Start_doy'] = (Data.loc[Data['Study_Season']==1].resample('Y').min()['DOY'].values)
Temp['Study_End_doy'] = (Data.loc[Data['Study_Season']==1].resample('Y').max()['DOY'].values)
# # # Data['Season_Root']

Temp['Duration']=Temp['Study_End_doy']-Temp['Study_Start_doy']
Temp[['Study_Start','Study_End','AL','Duration']]
# Temp['Study_Start_doy'].max()-Temp['Study_Start_doy'].min()
# Temp['Study_End_doy'].max()-Temp['Study_End_doy'].min()

# # Data.loc[Data['Study_Season']==1].resample('Y').min()['Date'].values
# Temp

# print(Data.loc[Data.index.year==2013,['AL_data_est','snow_cover']].dropna())

Unnamed: 0,Study_Start,Study_End,AL,Duration
2008,Jun-02,Sep-20,,110
2009,Jun-06,Sep-19,0.502,105
2010,Jun-05,Sep-21,0.521,108
2011,Jun-01,Sep-24,0.516,115
2012,Jun-03,Oct-09,0.56,128
2013,Jun-07,Sep-27,0.512,112
2014,Jun-05,Sep-24,0.528,111
2015,May-25,Sep-19,0.533,117
2016,May-27,Sep-28,0.538,124
2017,Jun-01,Oct-08,0.55,129


In [41]:
### print((Data_y[['Air_Temp','Rainfall','NR_Wm2_Avg','Temp_5_1','Temp_5_2','Temp_15_1','Temp_15_2','Table_1','Active_Layer_1']].corr()**2).round(2))#,'NR_Wm2_Avg']])
Data['MM-DD'] = Data.index.strftime('%m-%d')
DataT = Data.loc[((Data.index.month>=4)&(Data.index.month<=11))]

Data_d = DataT.groupby('MM-DD').mean()
Data_d['Rainfall'] = DataT.groupby('MM-DD').sum()['Rainfall']
Data_d_mx = DataT.groupby('MM-DD').max()
Data_d_mn = DataT.groupby('MM-DD').min()
# print((Data_d[['Air_Temp','Rainfall','NR_Wm2_Avg','Temp_5_1','Temp_5_2','Temp_15_1','Temp_15_2','Table_1','Active_Layer_1']].corr()**2).round(2))#,'NR_Wm2_Avg']])

fig,axes = plt.subplots(3,2,figsize=(7.25,7),sharex=True)#(18.2*cm_2_in, 13*cm_2_in),sharex=True)



ecolor='k'
ax = axes[0,0]
ax.plot(Data_d['Temp_5_1_OLS']*0,color='k')
ax.plot(Data_d['Air_Temp'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['Air_Temp'],Data_d_mx['Air_Temp'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('a. Air Temperature',loc='left')
ax.set_ylabel('$^\circ$C')
ax.set_ylim(-20,29)

ax = axes[0,1]
# ax.plot(Data_d['Temp_5_1_OLS']*0,color='k')
ax.plot(Data_d['snow_cover'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['snow_cover'],Data_d_mx['snow_cover'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('b. Snow Cover',loc='left')
ax.set_ylabel('%')
ax.set_ylim(0,100)


ax = axes[1,0]
ax.plot(Data_d['Temp_5_1_OLS']*0,color='k')
ax.plot(Data_d['Temp_5_1_OLS'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['Temp_5_1_OLS'],Data_d_mx['Temp_5_1_OLS'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('c. Polygon Center Ts 5cm',loc='left')
ax.set_ylabel('$^\circ$C')
ax.set_ylim(-15,20)

ax = axes[1,1]
ax.plot(Data_d['Temp_5_1_OLS']*0,color='k')
ax.plot(Data_d['Temp_15_1_OLS'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['Temp_15_1_OLS'],Data_d_mx['Temp_15_1_OLS'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('d. Polygon Center Ts 15cm',loc='left')
ax.set_ylabel('$^\circ$C')
ax.set_ylim(-15,20)




ax = axes[2,0]
ax.plot(Data_d['Temp_5_1_OLS']*0,color='k')
ax.plot(Data_d['Table_1_OLS'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['Table_1_OLS'],Data_d_mx['Table_1_OLS'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('e. Water Table Depth',loc='left')
ax.set_ylabel('m')

for label in ax.get_xticklabels():
    label.set_ha("center")
    label.set_rotation(45)


D_d = DataT.resample('D').mean()
D_d['MM-DD'] = DataT.resample('D').first()['MM-DD']

ax = axes[2,1]


ax.plot(Data_d['AL_data_est'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['AL_data_est'],Data_d_mx['AL_data_est'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_ylabel('m')



ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('f. Thaw Depth',loc='left')

for label in ax.get_xticklabels():
    label.set_ha("center")
    label.set_rotation(45)
    
    
for aa in axes:
    for a in aa: 
#         a.plot(['06-23','06-23'],[-100,100])
#         a = b.twinx()
        
        a.axvspan('06-23','06-23',facecolor=[.97,.97,.97,.0],edgecolor='r')
        a.axvspan('09-13','09-13',facecolor=[.97,.97,.97,.0],edgecolor='r')
    
#     ax.axvspan(End,Xl2+1,facecolor=[.97,.97,.97,.65],edgecolor='k')


P = [0,14,30,45,61,75,92,105,123,136,153,167,183,197,210,228]
# # # P = [0,14,31,45,61,75,92,106,123,137,153,167,183]
ax.set_xticks(P[1::2])
ax.set_xlim(14,228)

plt.suptitle('Daily Conditions at Fish Island, 2009 - 2019')

plt.tight_layout()

# plt.savefig('G:\\My Drive\\PhD_Thesis\\Chapter3_FiData/Drivers_OLS.jpg',dpi=600)
plt.savefig(G_Drive+'PhD_Thesis/Chapter3_Figs/'+'DailyValues.png')

<IPython.core.display.Javascript object>

In [42]:
Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)
#           &(Data.index.month<=8)&(Data.index.year>=6)
         )].resample('Y').sum()['Rainfall']

Data.loc[((Data.index.year<=2019)&(Data.index.year>=2010)
#           &(Data.index.month<=8)&(Data.index.year>=6)
         )].groupby('Month').sum()['Rainfall']/10


RD = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2010)
#           &(Data.index.month<=8)&(Data.index.year>=6)
         )].resample('D').mean()[['Rainfall','Month']]
print(RD.loc[RD['Rainfall']>0].groupby('Month').count()['Rainfall']/RD.groupby('Month').count()['Rainfall'])

Month
1.0          NaN
2.0          NaN
3.0     0.003226
4.0     0.006667
5.0     0.077419
6.0     0.230000
7.0     0.345161
8.0     0.493548
9.0     0.473333
10.0    0.138710
11.0         NaN
12.0         NaN
Name: Rainfall, dtype: float64


In [43]:

# Annualmx = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009))].resample('Y').max()#['Air_Temp']
# Summer = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
#                    (Data.index.month>=6&(Data.index.month<=9))
# #                   (Data.index.month<=8)&(Data.index.month>=6)


Data['MM-YY'] = Data.index.strftime('%m-%y')

#                   )].resample('Y').mean()#['Air_Temp']
v='Wind_Speed'
plt.figure(figsize=(4,4))
# Season.boxplot(column=v)
Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&(Data['Study_Season']==1))].boxplot(column=v)
# Data.boxplot(column=v)


# Data

# print(8.5/1e3*3600)

# Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&(Data['Study_Season']==1))].std()[v]/(
# Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&(Data['Study_Season']==1))].count()[v]**.5)*stats.t.ppf(1-0.025,
                                                                                                      
# Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&(Data['Study_Season']==1))].count()[v])



# plt.figure()
# plt.plot(Data.groupby(Data.index.dayofyear).median()[v])

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [44]:
Data_d.loc[Data_d['Temp_15_1_OLS']>0].index
# Data_d.loc[Data_d['Air_Temp']>0].index
# # Data_d.loc[Data_d['Air_Temp']==Data_d['Air_Temp'].max()].index
# # Data_d['Air_Temp'].max()
# # Data_d_mn.loc[Data_d_mn['Air_Temp']>0].index

print(Data.loc[Data['Study_Season']==1,['Temp_5_1_OLS','Temp_5_2_OLS','Temp_15_1_OLS','Temp_15_2_OLS']].describe())
from scipy.stats import ttest_rel


A,B = 'Temp_15_1_OLS','Temp_15_2_OLS'
D = Data.loc[Data['Study_Season']==1,[A,B]].dropna()
ttest_rel(D[A].values,D[B].values)

       Temp_5_1_OLS  Temp_5_2_OLS  Temp_15_1_OLS  Temp_15_2_OLS
count  32932.000000  32932.000000   32932.000000   32932.000000
mean       4.363508      6.936123       1.812821       1.856532
std        2.957901      3.597503       1.852111       1.870455
min       -3.270393     -2.387224      -4.592681      -4.415352
25%        2.201491      4.304601       0.704504       0.710821
50%        4.221547      6.516410       1.990529       2.033403
75%        6.395300      9.155127       3.088795       3.159994
max       16.258440     22.291132       7.762643       7.850912


Ttest_relResult(statistic=-49.854500884255046, pvalue=0.0)

In [45]:
# print((Data_y[['Air_Temp','Rainfall','NR_Wm2_Avg','Temp_5_1','Temp_5_2','Temp_15_1','Temp_15_2','Table_1','Active_Layer_1']].corr()**2).round(2))#,'NR_Wm2_Avg']])
Data['MM-DD'] = Data.index.strftime('%m-%d')
DataT = Data.loc[((Data.index.month>=4)&(Data.index.month<=11))]

Data_d = DataT.groupby('MM-DD').mean()
Data_d_mx = DataT.groupby('MM-DD').max()
Data_d_mn = DataT.groupby('MM-DD').min()
# print((Data_d[['Air_Temp','Rainfall','NR_Wm2_Avg','Temp_5_1','Temp_5_2','Temp_15_1','Temp_15_2','Table_1','Active_Layer_1']].corr()**2).round(2))#,'NR_Wm2_Avg']])

fig,axes = plt.subplots(3,figsize=(7.25,7),sharex=True)#(18.2*cm_2_in, 13*cm_2_in),sharex=True)

ecolor='k'
ax = axes[0]
ax.plot(Data_d['Air_Temp'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['Air_Temp'],Data_d_mx['Air_Temp'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('a. Air Temperature',loc='left')
ax.set_ylabel('$^\circ$C')
ax.set_ylim(-29,29)


# ax = axes[0,1]
ax.plot(Data_d['Temp_5_1_OLS'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['Temp_5_1_OLS'],Data_d_mx['Temp_5_1_OLS'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('b. Polygon Center Ts 5cm',loc='left')
ax.set_ylabel('$^\circ$C')
ax.set_ylim(-29,29)

# ax = axes[1,0]
ax.plot(Data_d['Temp_15_1_OLS'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['Temp_15_1_OLS'],Data_d_mx['Temp_15_1_OLS'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('c. Polygon Center Ts 15cm',loc='left')
ax.set_ylabel('$^\circ$C')
ax.set_ylim(-29,29)

ax = axes[1]
ax.plot(Data_d['Table_1_OLS'],color='#648ccc')
ax.fill_between(Data_d.index,Data_d_mn['Table_1_OLS'],Data_d_mx['Table_1_OLS'],
                facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
ax.set_axisbelow(True)
ax.grid(color='gray', linestyle='dashed')
ax.set_title('d. Water Table Depth',loc='left')
ax.set_ylabel('$^\circ$C')

# ax = axes[2,0]
# ax.plot(Data_d['snow_cover'],color='#648ccc')
# ax.fill_between(Data_d.index,Data_d_mn['snow_cover'],Data_d_mx['snow_cover'],
#                 facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
# ax.set_axisbelow(True)
# ax.grid(color='gray', linestyle='dashed')
# ax.set_title('e. Snow Cover',loc='left')
# ax.set_ylabel('%')
for label in ax.get_xticklabels():
    label.set_ha("center")
    label.set_rotation(45)

    
    
# ax = axes[2,1]
# ax.plot(Data_d['NDVI_FI_LCP'],color='#648ccc')
# ax.fill_between(Data_d.index,Data_d_mn['NDVI_FI_LCP'],Data_d_mx['NDVI_FI_LCP'],
#                 facecolor='#648ccc1A',edgecolor=ecolor,linestyle=':')
# ax.set_axisbelow(True)
# ax.grid(color='gray', linestyle='dashed')
# ax.set_title('f. NDVI',loc='left')

for label in ax.get_xticklabels():
    label.set_ha("center")
    label.set_rotation(45)
    
    


P = [0,14,30,45,61,75,92,105,123,136,153,167,183,197,210,228]
# # # P = [0,14,31,45,61,75,92,106,123,137,153,167,183]
ax.set_xticks(P[1::2])
ax.set_xlim(14,228)


plt.tight_layout()

# plt.savefig('G:\\My Drive\\PhD_Thesis\\Chapter3_FiData/Drivers_OLS.jpg',dpi=600)
plt.savefig(G_Drive+'PhD_Thesis/Chapter3_Figs/'+'DailyValues.png')

<IPython.core.display.Javascript object>

In [46]:
D = Data.loc[((Data.index.year<=2019)&(Data.index.year>=2009)&
                   (Data.Study_Season==1)#&(Data.Wind_Speed>=10)
                  ),'Wind_Speed'].resample('D').max()
D.resample('Y').max()#['Wind_Speed']
D.loc[D>10].resample('Y').count()

2012-12-31 00:00:00-07:00    5
2013-12-31 00:00:00-07:00    3
2014-12-31 00:00:00-07:00    4
2015-12-31 00:00:00-07:00    3
2016-12-31 00:00:00-07:00    2
2017-12-31 00:00:00-07:00    4
2018-12-31 00:00:00-07:00    2
2019-12-31 00:00:00-07:00    4
Freq: A-DEC, Name: Wind_Speed, dtype: int64