In [1]:
import os
import scipy.stats as stats
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import datetime as dt
import pytz
from sklearn.utils import resample
%matplotlib inline
import warnings

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor as MPR
from sklearn.preprocessing import Normalizer,StandardScaler,MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn import metrics
from sklearn.metrics import classification_report,confusion_matrix


from scipy.optimize import curve_fit

from matplotlib.ticker import NullFormatter



warnings.filterwarnings('ignore')
Root = 'C:/FishIsland_2017/'


FluxFolder='FluxResults/ProperWindSpeeds/'

TenHz_Path = Root+FluxFolder+'10Hz/eddypro_10Hz_full_output_2018-03-25T172145_adv.csv'
OneHz_Path = Root+FluxFolder+'1Hz/eddypro_1Hz_full_output_2018-03-25T142146_adv.csv'

Met_Path = Root+'MetStationData/CR1000_ClimateData_Updated.txt'
Soil_Path = Root+'SoilStationData/Soil_Data.csv'




In [None]:

def Light_Response(PPFD,alpha,beta,gamma):
    return(-(beta+gamma)*(1-np.exp((-alpha*PPFD)/(beta+gamma)))+gamma)

def Light_Response_Temp(X,alpha,beta,r1,r2,r3):
    PPFD,temp = X
    return(-(beta+(1/(r1*r2**temp+r3)))*(1-np.exp((-alpha*PPFD)/(beta+(1/(r1*r2**temp+r3)))))+(1/(r1*r2**temp+r3)))

def Light_Response_Temp2(X,alpha,beta,theta,r1,r2,r3):
    PPFD,temp = X
    return(-1/2*theta*(alpha*PPFD+beta-((alpha*PPFD+beta)**2-4*alpha*beta*theta*PPFD)**.5)+(1/(r1*r2**temp+r3)))
#     return(-(beta+(1/(r1*r2**temp+r3)))*(1-np.exp((-alpha*PPFD)/(beta+(1/(r1*r2**temp+r3)))))+(1/(r1*r2**temp+r3)))

#

In [None]:
def SimpleANN(Data,keys,node,iters,Verbose=True,Plot=False):
    Data_2 = Data[keys].dropna()
    X = Data_2[keys[1:]].values
    y = Data_2[keys[0]].values
    
    Metric = []
    Metric2 = []
    for r in range(0,iters):
        ANN=  Pipeline([('scaling', StandardScaler()), 
                ('MPR',  MPR(hidden_layer_sizes=(node),max_iter = 1000,activation='logistic',solver ='lbfgs',
                learning_rate = 'adaptive',early_stopping=True,tol=1e-5,validation_fraction=.1,
                             random_state = r,learning_rate_init =1e-3))])
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=r)
        
        ANN.fit(X_train,y_train)    
        y_pred = ANN.predict(X_test)
        if Plot == True:
            plt.scatter(y_test,y_pred)
            plt.xlabel('pred')
            plt.ylabel('true')
            plt.xlim(y.min(),y.max())
            plt.ylim(y.min(),y.max())
        Metric2.append(metrics.mean_absolute_error(y_test,y_pred))
        Metric.append(metrics.r2_score(y_test,y_pred))
        pred = ANN.predict(Data[keys[1:]].dropna().values)
        if Verbose == True:
            print(ANN.named_steps.MPR.loss_,metrics.r2_score(y_test,y_pred))
        if r == 0:
            Pred = pred
        else:
            Pred += pred
    Pred /=(r+1)
    Metric = np.asanyarray(Metric)
    Metric2 = np.asanyarray(Metric2)
    if Verbose == True:
        print(Metric2.mean())
    return(Metric.mean(),Metric2.mean(),Pred,Metric2.std())


In [None]:
class Compile:
    def __init__(self,Flux_Paths,Met,Soil):
        self.Fluxes = ['H','LE','co2_flux','ch4_flux']
        Flux_10 = self.Format(pd.read_csv(Flux_Paths[0],delimiter = ',',skiprows = 0,parse_dates={'datetime':[1,2]},header = 1,na_values = -9999),v=1,drop = [0,1])
        Flux_1 = self.Format(pd.read_csv(Flux_Paths[1],delimiter = ',',skiprows = 0,parse_dates={'datetime':[1,2]},header = 1,na_values = -9999),v=1,drop = [0,1])
        Flux_10['Hz']=10
        Flux_1['Hz'] = 1
        Flux = Flux_1.append(Flux_10)
        Met = self.Format(pd.read_csv(Met,delimiter = ',',skiprows = 1,parse_dates={'datetime':[0]},header = 0),v=2,drop = [0])
        Soil = self.Format(pd.read_csv(Soil,delimiter = ',',skiprows = 0,parse_dates={'datetime':[0]},header = 0),v=0,drop = [0])

        self.RawData = pd.concat([Flux,Met,Soil],axis = 1, join = 'outer')
        self.VeryRawData = self.RawData.copy()
        self.StorageCorrection()
        for var in self.Fluxes:
            self.RawData[var+'_drop'] = 0
        self.RawData['Minute'] = self.RawData.index.hour*60+self.RawData.index.minute
        self.Wind_Bins(30)
        self.PPFD_Bins(100)
        self.RawData['Day'] = np.floor(self.RawData['DOY'])
        
        Mt = pytz.timezone('US/Mountain')
        self.RawData['UTC'] = self.RawData.index.tz_localize(pytz.utc).tz_convert(Mt)
        self.Rain_Check([.5,0])

    def Format(self,df,v,drop):
        df = df.ix[v:]
        df = df.set_index(pd.DatetimeIndex(df.datetime))
        df = df.drop(df.columns[drop],axis=1)
        df = df.astype(float)
        return(df)
    
    def Date_Drop(self,Date,Vars):
        if Vars == 'All':
            self.RawData = self.RawData.drop(self.RawData.loc[(self.RawData.index>Date[0])&(self.RawData.index<Date[1])].index)
        else:
            self.RawData.loc[(self.RawData.index>Date[0])&(self.RawData.index<Date[1]),[Vars]]=np.nan
            
    def Wind_Bins(self,Bins):
        self.bins = np.arange(0,360.1,Bins)
        self.RawData['Dir'] = pd.cut(self.RawData['wind_dir'],bins=self.bins,labels = (self.bins[0:-1]+self.bins[1:])/2)
        
    def ustar_Bins(self,Bins,LightFilter = {'Var':'PPFD_Avg','Thresh':10},
               uFilter={'Var':'co2_flux','Plot':False},BootStraps={'Repetitions':100,'n_samples':10000}):
        def Rcalc(Grp,thrsh=0.95):
            Ratios=[]
            for G in Grp.index:
                m1 = Grp[uFilter['Var']][Grp.index==G].values[0]
                m2 = Grp[uFilter['Var']][Grp.index>G].mean()
                Ratios.append(m1/m2)
            Ratios = np.asanyarray(Ratios)
            try:
                uThresh = Grp.index[np.where(Ratios>=.95)[0]][0]
            except:
                print('Could not find u* thersh, defaulting to 0.1')
                uThresh = 0.1
            return(uThresh)
            
        self.uFilterData = self.RawData[self.RawData[LightFilter['Var']]<=LightFilter['Thresh']]
        self.bins = self.uFilterData['u*'].quantile(np.arange(0,Bins,1)/Bins).values
        self.uFilterData['u*bin'] = pd.cut(self.uFilterData['u*'],bins=self.bins,labels = (self.bins[0:-1]+self.bins[1:])/2)
        Grp = self.uFilterData.groupby(['u*bin']).mean()
        GrpC = self.uFilterData.groupby(['u*bin']).size()
        GrpSE = self.uFilterData.groupby(['u*bin']).std()/(GrpC)**.5
        self.uThresh_SampSize = GrpC.sum()
        
        self.uThresh = Rcalc(Grp)
        self.BootStraps = {}
        for i in range(BootStraps['Repetitions']):
            Samp = resample(self.RawData,replace=True,n_samples=BootStraps['n_samples'])
            Samp = Samp[Samp[LightFilter['Var']]<=LightFilter['Thresh']]
            bins = Samp['u*'].quantile(np.arange(0,Bins,1)/Bins).values
            Samp['u*bin'] = pd.cut(Samp['u*'],bins=bins,labels = (bins[0:-1]+bins[1:])/2)
            self.BootStraps[str(i)] = Samp
        Ge = []
        for i in Comp.BootStraps:
            G = Comp.BootStraps[i].groupby(['u*bin']).mean()
            Ge.append(Rcalc(G))
        Ge = np.asanyarray(Ge)
        self.Pct = {'5%':np.percentile(Ge,[5]),'50%':np.percentile(Ge,[50]),'95%':np.percentile(Ge,[95])}
        if uFilter['Plot'] == True:
            plt.figure(figsize=(6,5))
            plt.errorbar(Grp['u*'],Grp[uFilter['Var']],yerr=GrpSE['u*'],label = 'Mean +- 1SE')
            def Vlines(var,c,l):
                plt.plot([var,var],[Grp[uFilter['Var']].min(),Grp[uFilter['Var']].max()],
                         color = c,label=l)
            Vlines(self.uThresh,c='red',l='Mean')
            Vlines(self.Pct['5%'],c='green',l='5%')
            Vlines(self.Pct['50%'],c='yellow',l='50%')
            Vlines(self.Pct['95%'],c='blue',l='95%')
            plt.legend()
            plt.title('u* Thershold & Bootstrapped 95% CI')
            plt.grid()
        
    def PPFD_Bins(self,Bins):
        self.bins = np.arange(0,self.RawData['PPFD_Avg'].max()+1,Bins)
        self.RawData['Photon_Flux'] = pd.cut(self.RawData['PPFD_Avg'],bins=self.bins,labels = (self.bins[0:-1]+self.bins[1:])/2)

    def Rain_Check(self,thresh):
        self.RawData['Rain_diff'] = self.RawData['Rain_mm_Tot'].diff()
        for var in self.Fluxes:
            if var!='ch4_flux':
                self.RawData.loc[self.RawData['Rain_mm_Tot']>thresh[0],[var,var+'_drop']]=[np.nan,1]
            else:
                self.RawData.loc[self.RawData['Rain_mm_Tot']>thresh[1],[var,var+'_drop']]=[np.nan,1]
        
    def Spike_Removal(self,z_thresh,AltData=None):
        def Remove(series):
            di1 = series.diff()
            di1[:-1] = di1[1:]
            di = di1.diff()
            MD = di.median()
            MAD = np.abs(di-MD).median()
            F1 = di<MD-(z_thresh*MAD/0.6745)
            F2 = di>MD+(z_thresh*MAD/0.6745)
            series.loc[F1==True]=np.nan
            series.loc[F2==True]=np.nan
            Droppers = series.index[np.isnan(series)==True]
            VAR = self.RawData[var]
            VAR.ix[Droppers] = np.nan
            dina = VAR.diff()
            dina[:-1] = dina[1:]
            dina2 = VAR.diff()
            NaMid = VAR.index[((np.isnan(dina)==True)&(np.isnan(dina2)==True))]
            VAR.ix[NaMid] = np.nan
            return(VAR)       
        
        if AltData == None:
            for var in self.Fluxes:
                self.RawData[var]=Remove(self.RawData[var].dropna())
        else:
            AltData[var]=Remove(self.AltData[var].dropna())
            return(AltData[0])
        
    def Wind_Filter(self,width):
        for var in self.Fluxes:
            self.RawData.loc[((self.RawData['wind_dir']>215-width)&(self.RawData['wind_dir']<215+width)),[var,var+'_drop']]=[np.nan,1]
        
    def StorageCorrection(self,Raw=True):
        if Raw == False:
            self.Data['fco2'] = self.Data['co2_flux']+self.Data['co2_strg']
            self.Data['fch4'] = self.Data['ch4_flux']+self.Data['ch4_strg']
        else:
            self.RawData['fco2'] = self.RawData['co2_flux']+self.RawData['co2_strg']
            self.RawData['fch4'] = self.RawData['ch4_flux']+self.RawData['ch4_strg']
        
    def Signal_Check(self,thresh):
        self.RawData['ch4_noSSFilter'] = self.RawData['ch4_flux']
        self.RawData.loc[self.RawData['rssi_77_mean']<thresh,['ch4_flux','ch4_flux_drop']] = [np.nan,1]
    
    def QC_Check(self,thresh):
        for var in self.Fluxes:
            self.RawData.loc[self.RawData['qc_'+var]>=thresh,[var,var+'_drop']]=[np.nan,1]
            self.RawData.loc[np.isnan(self.RawData[var]) == True,[var+'_drop']]=1
            
    def Ustar_Drop(self,Override=None):
        self.Data = self.RawData.copy()
        if Override != None:
            self.uThresh = Override
        for var in self.Fluxes:
            self.Data.loc[self.Data['u*']<self.uThresh,[var,var+'_drop']]=[np.nan,1]
        self.StorageCorrection(Raw=False)

## Initialize everything and parse the data
Comp = Compile([TenHz_Path,OneHz_Path],Met_Path,Soil_Path)

Drop = [['2017-06-01 15:00:00','2017-06-23 15:00:00'],
['2017-08-28 00:00:00','2017-09-11 16:00:00'],
['2017-09-12 07:30:00','2017-09-12 15:00:00']]
Var = ['All','co2_flux','co2_flux']
for drop,var in zip(Drop,Var):
    Comp.Date_Drop(drop,Vars=var)
Comp.QC_Check(2)
Comp.Spike_Removal(z_thresh=5.5)
Comp.Wind_Filter(30)


In [None]:
# Comp.ustar_Bins(30,{'Var':'PPFD_Avg','Thresh':20},{'Var':'ch4_flux','Plot':True},
#                 {'Repetitions':100,'n_samples':10000})

# print(Comp.uThresh,Comp.Pct,Comp.uThresh_SampSize)

Comp.ustar_Bins(30,{'Var':'PPFD_Avg','Thresh':10},{'Var':'co2_flux','Plot':True},
                {'Repetitions':100,'n_samples':10000})

print(Comp.uThresh,Comp.Pct,Comp.uThresh_SampSize)

# plt.ylim(-10,10)

In [None]:
Comp.Ustar_Drop()
Data = Comp.Data
Data['WtrTbl_Range'] = Data['Table_1'].rolling(48).max()-Data['Table_1'].rolling(48).min()
Data['Rain_1D'] = Data['Rain_mm_Tot'].rolling(48).sum()
Data['Time']=Data.index.hour
Data['fch4']*=1000

In [None]:
Temp = Data[['fco2','PPFD_Avg']].dropna()
Temp = Data[['fco2','PPFD_Avg','Temp_2_5_1']].dropna()
popt, pcov = curve_fit(Light_Response_Temp, (Temp['PPFD_Avg'].values,Temp['Temp_2_5_1'].values,), Temp['fco2'].values)
popt2, pcov2 = curve_fit(Light_Response_Temp2, (Temp['PPFD_Avg'].values,Temp['Temp_2_5_1'].values,),
                       Temp['fco2'].values,p0=(0.00716274,1.52597427,1,2.69368876,0.86723787,0.65435465))

# pred = Light_Response_Temp((Temp['PPFD_Avg'],Temp['Temp_2_5_1']),popt[0],popt[1],popt[2],popt[3],popt[4])
pred2 = Light_Response_Temp2((Temp['PPFD_Avg'],Temp['Temp_2_5_1']),popt2[0],popt2[1],popt2[2],popt2[3],popt2[4],popt2[5])



r2 = (metrics.r2_score(Temp['fco2'].values,pred2))

fig = plt.figure(figsize=(8,5.8))
plt.scatter(Temp['PPFD_Avg'],Temp['fco2'],color='green',label='Observations',s=15)
plt.scatter(Temp['PPFD_Avg'],pred2,color=(0.5,0.5,0.5),label='Estimated: r$^2$ = '+str(np.round(r2,2)),s=15)

def add_subplot_axes(ax,rect,axisbg='w'):
    fig = plt.gcf()
    box = ax.get_position()
    width = box.width
    height = box.height
    inax_position  = ax.transAxes.transform(rect[0:2])
    transFigure = fig.transFigure.inverted()
    infig_position = transFigure.transform(inax_position)    
    x = infig_position[0]
    y = infig_position[1]
    width *= rect[2]
    height *= rect[3]  # <= Typo was here
    subax = fig.add_axes([x,y,width,height],axisbg=axisbg)
    x_labelsize = subax.get_xticklabels()[0].get_size()
    y_labelsize = subax.get_yticklabels()[0].get_size()
#     x_labelsize *= rect[2]**0.5
#     y_labelsize *= rect[3]**0.5
    subax.xaxis.set_tick_params(labelsize=x_labelsize)
    subax.yaxis.set_tick_params(labelsize=y_labelsize)
    return subax

ChamberData=pd.read_csv('C:\\FishIsland_2017/ChamberFluxes.csv')
plt.grid()
ax = fig.add_subplot(111)
rect = [0.75,0.695,0.2,0.33]
ax1 = add_subplot_axes(ax,rect)
ax1.boxplot(ChamberData['Flux'].dropna())
# ax1.xaxis.set_major_formatter(NullFormatter())
ax1.get_xaxis().set_visible(False)
plt.sca(ax1)
plt.yticks([0,1,2])
ax.set_title('GEP + ER Fit',fontsize = 24,loc='left')
ax1.set_title('Chamber Fluxes',fontsize=20,y=1.05)
ax.legend(loc='lower left',fontsize = 16)
# plt.xticks('')


plt.grid()
plt.tight_layout()
plt.savefig('C:/Users/wesle/CompsProj/GapFilling.png',dpi=500)
# print(popt_r,pcov)
Data['TempFill']=pred2 = Light_Response_Temp2((Data['PPFD_Avg'],
                       Data['Temp_2_5_1']),popt2[0],popt2[1],popt2[2],popt2[3],popt2[4],popt2[5])
# np.nan
Data['fco2_filled'] = Data['fco2'].fillna(Data['TempFill'])


# Node Search

In [None]:
# keys = ['fch4','wind_speed','NR_Wm2_Avg','VWC_1','AirTC_Avg','Time']

from multiprocessing import Pool

def f(x):
    return x*x

if __name__ == '__main__':
    p = Pool(5)
    print(p.map(f, [1, 2, 3]))


# keys = ['fch4','Rain_1D','u*','air_pressure','NR_Wm2_Avg','WtrTbl_Range','Temp_15_1','Active_Layer_1','Time']
 
    
    
# Nodes = np.arange(2,10,1)
# P = []
# S = []
# for node in Nodes:
#     r2,mae,pred,std = SimpleANN(Data,
#                     keys=keys,
#                     node=node,iters=15,Verbose=False)
#     P.append(mae)
#     S.append(S)
    
# P = np.asanyarray(P)
# S = np.asanyarray(S)
# print(P)
# plt.figure()
# plt.eroorbar(Nodes,P,yerr=S)

# nodes=Nodes[np.where(P == P.min())]

# Results = SimpleANN(Data,keys=keys,node=nodes,iters=15,Verbose=False)
# pred = Results[2]
# Score = Results[0]
# Data['Fill'] = np.nan
# Temp = Data[keys[1:]].dropna()
# Temp['Fill'] = pred
# Data['Fill'] = Data['Fill'].fillna(Temp['Fill'])
# print(Score)

In [None]:

# keys = ['fch4','wind_speed','NR_Wm2_Avg','VWC_1','AirTC_Avg','Time']

# P = []
# keys = ['fch4','Rain_1D','u*','air_pressure','PPFD_Avg','WtrTbl_Range',
#         'Temp_15_1','Temp_2_5_1','Active_Layer_1','fco2_filled','Time']


# keys = ['fch4','Rain_1D','u*','air_pressure','PPFD_Avg','WtrTbl_Range',
#         'Temp_15_1','Temp_2_5_1','Active_Layer_1','Time']
# keys = ['fch4','PPFD_Avg', 'Active_Layer_1', 'u*']

# Node Search



In [None]:
Red = (.95,.25,.1,.45)
plt.figure(figsize=(5.5,5.5))
plt.scatter(Data['fch4'],Data['Fill'],label='r$^2$ = '+str(np.round(Score,3)),color=Red,s=15)
plt.xlabel('Observed',fontsize = 16)
plt.ylabel('Estimated',fontsize = 16)
plt.grid()
plt.xlim(Data['fch4'].min()-5,Data['fch4'].max()+5)
plt.ylim(Data['fch4'].min()-5,Data['fch4'].max()+5)
plt.plot([Data['fch4'].min(),Data['fch4'].max()],[Data['fch4'].min(),Data['fch4'].max()],
        color='black',label='1:1')
plt.legend(fontsize = 16)
plt.title('Neural Network',fontsize=24)
plt.tight_layout()
plt.savefig('C:/Users/wesle/CompsProj/GapFilling_NN.png',dpi=500)

Data['fch4_filled'] = Data['fch4'].fillna(Data['Fill'])

In [None]:
# keys = ['fch4','Rain_1D','u*','air_pressure','PPFD_Avg','WtrTbl_Range',
#         'Temp_15_1','Temp_2_5_1','Active_Layer_1','Time']

def K_search(RunKeys,SearchKeys,K_min=None):
    MAE = []
    K = []
    RK = RunKeys.copy()
    for k in SearchKeys:
        if k not in RunKeys:
            RunKeys=RK.copy()
            RunKeys.extend([k])
            key = RunKeys
            K.append(k)
    #             print(key)
            Results = SimpleANN(Data,keys=key,node=3,iters=10,Verbose=False)
            MAE.append(Results[1])
    MAE = np.asanyarray(MAE)
#     K = np.asanyarray(K)
    return(MAE,K)

def Loop_Search(RunKey,keys):
    rk2=RunKey.copy()
    MAE, K = K_search(RunKey,keys[1:])
    RunKey = rk2
    K_min = K[np.where(MAE == MAE.min())[0][0]]
    RunKey.append(K_min)
    return(RunKey,K_min,MAE,K)
    

RunKey = [keys[0]]
RunKey,K_min,MAE,K = Loop_Search(RunKey,keys)
print(K_min)

RunKey,K_min2,MAE2,K2 = Loop_Search(RunKey,keys)
print(K_min2)


RunKey,K_min3,MAE3,K3 = Loop_Search(RunKey,keys)
print(K_min3)

RunKey,K_min4,MAE4,K4 = Loop_Search(RunKey,keys)
print(K_min4)

print(RunKey)


# plt.sca(ax[0,0])
# plt.xticks(rotation=20)
# plt.sca(ax[1,0])
# plt.xticks(rotation=20)

In [None]:
def Fmt(K):
    for i,k in enumerate(K):
        k = k.replace('2_5','2.5')
        k = k.replace('_15',' 15')
        k = k.replace('_1','')
        k = k.replace('_Avg','')
        k = k.replace('_',' ')
        K[i]=k
    return(K)
K=Fmt(K)
K2=Fmt(K2)
K3=Fmt(K3)
K4=Fmt(K4)

fig,ax=plt.subplots(2,2,figsize=(10,7))
ax[0,0].bar(K,MAE)
plt.sca(ax[0,0])
plt.ylim(bottom=7)
yl = ax[0,0].get_ylim()
ax[0,0].grid()
# ax[0,0].xaxis.set_major_formatter(NullFormatter())
ax[0,0].set_title('One Factor',fontsize = 16)
ax[0,0].set_ylabel('MAE nmol $m^{-2} s^{-1}$',fontsize = 16)
plt.sca(ax[0,0])
plt.xticks(rotation=35,fontsize=12)
L = ax[0,0].xaxis.get_ticklabels()


ax[0,1].bar(K2,MAE2)
ax[0,1].set_ylim(ax[0,0].get_ylim())
ax[0,1].grid()
ax[0,1].set_title(K_min+'+',fontsize = 16)
plt.sca(ax[0,1])
plt.xticks(rotation=35,fontsize=12)
# ax[1,0].xaxis.set_major_formatter(NullFormatter())

ax[1,0].bar(K3,MAE3)
ax[1,0].set_ylim(ax[0,0].get_ylim())
ax[1,0].grid()
ax[1,0].set_title(K_min+'+\n'+K_min2+'+',fontsize = 14)
ax[1,0].set_ylabel('MAE nmol $m^{-2} s^{-1}$',fontsize=16)
plt.sca(ax[1,0])
plt.xticks(rotation=35,fontsize=12)

ax[1,1].bar(K4,MAE4)
ax[1,1].set_ylim(ax[0,0].get_ylim())
ax[1,1].set_title(K_min+'+\n'+K_min2+'+'+K_min3+'+',fontsize = 16)
ax[1,1].grid()
plt.sca(ax[1,1])
plt.xticks(rotation=35,fontsize=12)

plt.tight_layout()
plt.savefig('C:/Users/wesle/CompsProj/Feature_Selection.png',dpi=500)
plt.show()

In [None]:
Green = (0,1,.1,.8)
Daily = Data.resample('D').mean()
fig,ax = plt.subplots(2,1,figsize = (10,7.5))

Daily['fco2_filled']=Daily['fco2_filled']* 1e-6 * 44.0095 *3600*24

Daily['fch4_filled']=Daily['fch4_filled']* 1e-6 * 16.04246 *3600*24

Mn = Daily['fch4_filled'].mean()*28*1e-3+Daily['fco2_filled'].mean()

print(-Mn*81/(365-81))
print(Daily['fch4_filled'].count())
print(Daily['fch4_filled'].mean()*28*1e-3+Daily['fco2_filled'].mean())

ax[0].bar(Daily.index,Daily['fco2_filled'],color = Green,edgecolor = 'black',label='Daily F$_{CO2}$')
ax[0].set_xlim('2017-06-22','2017-09-14')
Daily['Mean']=Daily['fco2_filled'].mean()
ax[0].plot(Daily['Mean'],color='black',label = 'Mean: '+ str(np.round(Daily['Mean'].mean(),2)))
ax[0].grid()
ax[0].set_ylabel('g m$^{-2}$ d$^{-1}$',fontsize = 16)
ax[0].legend(loc=4,fontsize = 16)


ax[1].bar(Daily.index,Daily['fch4_filled'],color = Red,edgecolor = 'black',label = 'Daily F$_{CH4}$')
ax[1].set_xlim('2017-06-22','2017-09-14')
Daily['Mean']=Daily['fch4_filled'].mean()
ax[1].plot(Daily['Mean'],color='black',label = 'Mean: ' + str(np.round(Daily['Mean'].mean(),2)))
ax[1].grid()
ax[1].set_ylabel('mg m$^{-2}$ d$^{-1}$',fontsize = 16)
ax[1].legend(loc = 1,fontsize = 16)

# print(Daily['fco2_filled'].mean(),Daily['fco2'].mean())
plt.tight_layout()
plt.savefig('C:/Users/wesle/CompsProj/GHG_Balance.png',dpi=500)
plt.show()

In [None]:
def Scatter(Var,xl=None,yl=None):
    plt.figure(figsize=(7,7))
    plt.scatter(Fix[Var],NoFix[Var])
    plt.xlabel('Fix')
    plt.ylabel('NoFix')
    if xl!=None:
        plt.xlim(xl[0],xl[1])
        plt.ylim(yl[0],yl[1])
    plt.title(Var)

# Scatter('Wind')
# # Scatter('u*')
# Scatter('co2_strg',[-2,2],[-2,2])
# Scatter('qc_co2_flux')#,[-5,5],[-5,5])

plt.figure(figsize=(15,10))
plt.legend()
# Scatter('H')
# Scatter('LE')


In [None]:
# plt.figure(figsize=(10,10))


for root,Dir,files in (os.walk(Root+'TagluStationData/Daily/')):
    for i,file in enumerate(files):
#         print(files,i)
        df = pd.read_excel(root+file,sheetname=[0],skiprows=[0,2,3])[0]
#         print(df.head())
        df = df.set_index(pd.DatetimeIndex(df.TIMESTAMP))
        if i == 0:
            Daily = df
        else:
            Daily = Daily.append(df)
#         df=df.resample('D').max()
#         plt.plot(Hourly.index.dayofyear,Hourly['WindSpd']*3.6,color=(0,0,0,.5))
#         plt.xlim(190,260)


for root,Dir,files in (os.walk(Root+'TagluStationData/Hourly/')):
    for i,file in enumerate(files):
        df = pd.read_excel(root+file,sheetname=[0],skiprows=[0,1,3])[0]
        df = df.set_index(pd.DatetimeIndex(df.TIMESTAMP))
        if i == 0:
            Hourly = df
        else:
            Hourly = Hourly.append(df)
#         df=df.resample('D').mean()
#         plt.plot(df.index.dayofyear,df['WindSpd']*3.6,color=(0,0,0,.75))
#         plt.xlim(190,260)
     
    

# plt.plot(Fix['Wind'].resample('D').mean().index.dayofyear,Fix['Wind'].resample('D').mean(),label='Fix',
#         linewidth=4)
# plt.plot(NoFix['Wind'].resample('D').mean().index.dayofyear,NoFix['Wind'].resample('D').mean(),label='NoFix',
#         linewidth=4)

# plt.ylim(0,30)

# print(Daily.index.dayofyear)
Daily.loc[Daily['Snow_Depth']<0] = np.nan
Daily.loc[Daily['Snow_Depth']>1] = np.nan
Daily['Snow_Depth'].ffill()

In [None]:
# Monthly = Data.ffill().resample('M').mean()


Daily.loc[Daily['Snow_Depth']<0] = np.nan
Daily.loc[Daily['Snow_Depth']>.7] = np.nan
Daily['Snow_Depth'].ffill()

Data=Hourly.resample('D').mean()
Data=Data.groupby(Data.index.dayofyear).mean()
DailyData=Daily.groupby(Daily.index.dayofyear).mean()
# print(Monthly)

# print(Daily.head())

fig,ax = plt.subplots(2,2,figsize=(10,10))

ax1 = ax[0,0]
ax2 = ax[0,1]
ax3 = ax[1,0]
ax4 = ax[1,1]

ax1.plot(Data['AirTemp'])
ax1.grid()
ax1.set_title('Air Temperature')

ax2.plot(Data['Net_Ttl_Rad_Avg'])
ax2.grid()
ax2.set_title('Net Radiation')

ax3.plot(DailyData['Snow_Depth'])
ax3.set_title('Snow Depth')
# ax3.set_ylim(0,1)
ax3.grid()

ax4.plot(Data['SoilMoist(3)'])
ax4.grid()
ax4.set_title('Soil Moisture')

In [None]:
plt.figure(figsize=(10,10))
plt.plot(Daily['Snow_Depth'])

In [None]:
Data=Comp.Data.copy()

print(Data['fch4'].mean())

# fco2 = Daily['fco2']*1e-6 * 44.0095 *3600*24
# fch4 = Daily['fch4']*1e-3 * 16.04246 *3600*24
# print(fco2.mean())
# print(fch4.mean())

def QuickRegress(Data,time,periods,X,y):
    Temp=Data.resample(time).mean()#[[X]].rolling(periods,min_periods=int(periods/2)).mean()
#     Temp[y] = Data[y]
#     plt.figure(figsize=(4,4))
#     plt.scatter(Data[X],Data[y],color='red')
    V = Temp[[X,y]].dropna()
    lr = stats.linregress(V[X],V[y])
    print()
    print(lr[2]**2,lr[3])
#     print(metrics.mean_absolute_error(V[y],V[X]*lr[0]+lr[1]))
    Data[X] = Data[X].rolling(periods).mean()

    Pred = Data[X]*lr[0]+lr[1]
#     plt.scatter(Data[y],Pred,label=str(np.round(lr[2]**2,2))+str(np.round(lr[3],2)))
#     plt.legend()
#     plt.grid()
    return(Pred)
    
    
# print(Data['PPFD_Avg'])
    
# TimeScaleSearch(Data,'Table_1','fch4')

# print(Data['PPFD_Avg'])

# Data['PPFD_Avg_rolling']=Data['PPFD_Avg'].rolling(5*2).range()





# TimeScaleSearch(Data,'VWC_1','fch4')
# TimeScaleSearch(Data,'PPFD_Avg','fch4')
# TimeScaleSearch(Data,'Temp_15_1','fch4')
# TimeScaleSearch(Data,'Table_1','fch4')
# TimeScaleSearch(Data,'Active_Layer_1','fch4')

Time=['30T','1H','4H','3D','3D']
Periods = [1,8,2,24,24]#,'2H','162H','162H']
x = ['u*','Temp_15_1','PPFD_Avg','Active_Layer_1','Table_1']
# Y = ['fch4','fch4','fch4','fch4','fch4']
i = 1
for time,p,X,y in zip(Time,Periods,x,Y):
    p = QuickRegress(Data,time,p,X,'fch4')
    if i ==1:
        P = p
    else:
        P+=p
    i+=1
P/=i


Data['Pred'] = P
Score = Data[['fch4','Pred']].dropna()

# print(metrics.mean_absolute_error(Score['fch4'],Score['Pred'])
      
# print(Score)
print()
r2 =(metrics.r2_score(Score['fch4'].values,Score['Pred'].values))


# r2 = metrics.r2_score(Score['fch4'].values,Score['Pred'].values)

plt.figure(figsize=(10,10))
plt.scatter(Data['fch4'],P,label=str(np.round(r2,2)))
plt.xlabel('True')
plt.ylabel('Pred')
plt.xlim(Data['fch4'].min(),Data['fch4'].max())
plt.ylim(Data['fch4'].min(),Data['fch4'].max())
plt.legend()
    
# plt.figure(figsize=(10,6))
# plt.scatter(Daily['Table_1'],Daily['fch4'],color='red')
# V = Daily[['Table_1','fch4']].dropna()
# lr = stats.linregress(V['Table_1'],V['fch4'])
# print(lr)
# plt.plot(Daily['Table_1'],Daily['Table_1']*lr[0]+lr[1])
noed=10
print()

# met = SimpleANN(Data,keys=['fch4','PPFD_Avg'],nodes=node)
# print(met)

# print()
# met = SimpleANN(Data,keys=['fch4','PPFD_Avg_rolling'],nodes=node)
# print(met)
Data['Wtable_Range']=Data['Table_1'].rolling(24).max()-Data['Table_1'].rolling(24).min()
Data['Pressure_D']=Data['air_pressure'].rolling(48).mean()-Data['air_pressure']#.rolling(48).min()
Data['Rain_1D']=Data['Rain_mm_Tot'].rolling(48).sum()#Data['Table_1'].rolling(48).min()
Data['Time'] = Data.index.hour
Data['fch4']*=1000
# print(Data['Wtable_Range'])
# plt.figure()
# plt.plot(Data['Wtable_Range'])
# plt.plot(Data['Table_1'])