##  Optimze and a Dense Neural Network for gap filling and feature identification

** With a few tweaks to RepRunner, an LSTM can be run instead

In [75]:
import time
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import combinations
from functools import partial
from multiprocessing import Pool
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import train_test_split

## Personal Modules
import ReadStandardTimeFill as RSTF
import importlib
import DenseNet as Dense
importlib.reload(Dense)
importlib.reload(RSTF)

%matplotlib notebook
%config IPCompleter.greedy=True

from scipy.optimize import minimize, curve_fit
from scipy.stats import norm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel
from matplotlib import cm

from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd

from ipywidgets import FloatProgress
from IPython.display import display
import os

from keras.models import model_from_json
import ReadStandardTimeFill as RSTF
# pool.close()

In [76]:
def TTV_Split(iteration,Memory,X,y,params,X_fill):
    if params['Save']['Model']==True:
        params['Save']['Weights'] = True
    params['seed'] = int(iteration%params['splits_per_mod']/params['splits_per_mod']*100)
    params['iteration'] = int(iteration/params['splits_per_mod'])
    X_train,X_test,y_train,y_test=train_test_split(X,y, test_size=0.1, random_state=params['seed'])
    X_train,X_val,y_train,y_val=train_test_split(X_train,y_train, test_size=0.11, random_state=params['seed'])
    return(Dense.Train_Steps(params,X_train,X_test,X_val,y_train,y_test,
        y_val,X_fill = X_fill,Memory=Memory),
        y_val,params)

def RunReps(Model,params,Runs):
    RST = RSTF.ReadStandardTimeFill(Path)
    offset = 5/params['proc']
    Memory = (math.floor(100/params['proc'])- offset) * .01
    MSE = []
    RST.Scale(params['Y'],Model)
    y = RST.y*1.0
    X = RST.X*1.0
    X_fill = RST.X_fill*1.0
    Yval = []
    y_val= []
    for i in range(params['K']):
        results = TTV_Split(i,Memory,X,y,params,X_fill)
        Yval = RST.YScaled.inverse_transform(results[0][1].reshape(-1,1))
        y_val = RST.YScaled.inverse_transform(results[1].reshape(-1,1))
        Runs['MSE'].iloc[i] = metrics.mean_squared_error(y_val,Yval)
        Runs['R2'].iloc[i] = metrics.r2_score(y_val,Yval)
        params=results[2]
        Runs['iteration'].iloc[i] = params['iteration']
        Runs['seed'].iloc[i] = params['seed']
    return(Runs)

In [88]:
FillVar = 'fch4'

# cwd = os.getcwd()
# print(cwd)
os.chdir('C:/Users/wesle/NetworkAnalysis/')
Vars = ['VWC','Sedge','Temp']
N = 34

Path = 'Data_Footprints_2018-06-12.csv'
Runs,params = Dense.Params('Full',FillVar,MP=False)
Runs['iteration'] = 0
Runs['seed'] = 0

params['N'] = N
params['Save']['Model'] = True
params['Model'] = 'Kitty'#'+'.join(Model)
print(params)

Runs = Runs.iloc[0:params['K']].drop('Model',axis=1)
Runs['N'] = params['N']
Runs = RunReps(Vars,params,Runs)


{'proc': 1, 'K': 20, 'epochs': 200, 'Y': 'fch4', 'splits_per_mod': 5, 'Save': {'Weights': False, 'Model': True}, 'N': 34, 'Model': 'Kitty'}
Saved model to disk


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [89]:
print(Runs)

     N        MSE        R2  iteration  seed
0   34  58.105648  0.637330          0     0
1   34  55.064653  0.579932          0    20
2   34  41.304629  0.602318          0    40
3   34  54.076921  0.635919          0    60
4   34  55.498765  0.441665          0    80
5   34  52.764736  0.670665          1     0
6   34  52.838554  0.596914          1    20
7   34  39.542484  0.619284          1    40
8   34  56.733242  0.618035          1    60
9   34  50.708917  0.489852          1    80
10  34  54.862845  0.657570          2     0
11  34  54.596033  0.583507          2    20
12  34  37.108802  0.642715          2    40
13  34  53.355769  0.640774          2    60
14  34  56.319812  0.433405          2    80
15  34  55.755354  0.651999          3     0
16  34  52.920369  0.596290          3    20
17  34  40.537682  0.609702          3    40
18  34  53.364893  0.640713          3    60
19  34  58.249193  0.413994          3    80


In [90]:
def Load_Model(params):
    json_file = open(os.getcwd()+'/'+params['Y']+'/Weights/'+params['Model']+'.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    return(loaded_model)
def Load_Weights(loaded_model,i,s):
    loaded_model.load_weights(os.getcwd()+'/'+params['Y']+'/Weights/'+params['Model']+'_'+str(i)+'_'+str(s)+'.h5')
    loaded_model.compile(loss='mean_squared_error', optimizer='adam')
    print("Loaded model from disk")
    return(loaded_model)
# print(Model)## Allows for calculation of partial derivatives & Mapping a over one dimension by fixing all other dimensions
def Fix(RST,Model,params,Vars,d,Fixed_Vals):
    di = [i for i,x in enumerate(Vars) if x == d]
    RST.Scale(params['Y'],Vars)
    y = RST.y*1.0
    X = RST.X*1.0
    Fixers = RST.XScaled.transform([Fixed_Vals])
    dx = X[:,di]
    print(dx.shape)
    X[:,:]=Fixers
    X[:,di]=dx
    n=100
    Xnu = np.zeros((n,X.shape[1]))
    dx = np.linspace(X[:,di].min(),X[:,di].max(),n)[:,np.newaxis]
    print(dx.shape)
    Xnu[:,:]=Fixers
    Xnu[:,di]=dx
    print(X.shape)
    return(Xnu,RST)

In [91]:

from matplotlib.mlab import griddata
def Map(X,dx,params,Runs):
    EmptyModel = Load_Model(params)
    results = []
    for i,s in zip(Runs['iteration'],Runs['seed']):
        Model = Load_Weights(EmptyModel,i,s) 
        results.append(Model.predict(X))
    results = np.asanyarray(results).mean(axis=0)
    print(results.shape)
    results = RST.YScaled.inverse_transform(results.reshape(-1,1))
    X = RST.XScaled.inverse_transform(X)
    plt.figure()
    data = pd.DataFrame(X,columns=Vars)
    data['Pred']=results
    plt.plot(data[dx],data['Pred'],label=X[:,0].mean())
    data = data.sort_values(by=dx)
    data = data.groupby(dx).mean().reset_index()
    plt.grid()
    plt.figure()
    data['d'+params['Y']+'/d'+dx]
    plt.plot(data[dx],((data['Pred'].diff()+data['Pred'].diff(3))/2)\
             /((data[dx].diff()+data[dx].diff(3))/2))
    plt.grid()
    plt.show()
#     print(data[dx][:10],data['Pred'][:10])
#     print(data[dx].diff()[1:10]*1e4,data['Pred'].diff()[1:10])

Model = Load_Model(params)
DataFile = 'Data_Footprints_2018-06-12.csv'
RST = RSTF.ReadStandardTimeFill(DataFile)

Fixed_Valsbot = RST.Master[Vars].quantile(1)
Fixed_Vals = RST.Master[Vars].quantile(.75)
Fixed_Valstop = RST.Master[Vars].quantile(1)

# Fixed_Vals['PPFD_Avg']=RST.Master['PPFD_Avg'].quantile(.95)
print(Fixed_Vals)
dx = 'Sedge'
X,RST=Fix(RST,Model,params,Vars,dx,Fixed_Vals.values)
Map(X,dx,params,Runs)
# print(X)

VWC      0.571063
Sedge    0.059629
Temp     8.608750
Name: 0.75, dtype: float64
(870, 1)
(100, 1)
(870, 3)
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
Loaded model from disk
(100, 1)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [92]:
#     score = loaded_model.evaluate(X, y, verbose=1)

In [169]:
# Time = time.time()
# def RunLoop(Runs):
    
#     f = FloatProgress(min=0, max=Runs['N'].count()) # instantiate the bar
#     display(f) # display the bar
# #     print(Runs)
#     for N in Runs['N'].unique():   
#         f.value+=params['K']
#         params['T'] = 0
#         params['N']=N
#         Results = RunReps(FullModel,params,pool)
#         MSE = Results[0]
#         R2 = Results[1]
#         Runs.loc[Runs['N']==N,'MSE']=MSE
#         Runs.loc[Runs['N']==N,'R2']=R2
# #         print(Runs.loc[Runs['N']==N])
#     return(Runs)

In [184]:
# data = (data.groupby('VWC').mean())
# print(data)

In [93]:
# plt.figure()