In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import optuna
from sklearn.neighbors import LocalOutlierFactor

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [2]:
cowpea = pd.read_excel('../data/prepared.xlsx', sheet_name='Cowpea')    ; cowpea['crop']   = 'cowpea'
maize = pd.read_excel('../data/prepared.xlsx', sheet_name='Maize')      ; maize['crop']    = 'maize'
rice = pd.read_excel('../data/prepared.xlsx', sheet_name='Rice')        ; rice['crop']     = 'rice'
chickpea = pd.read_excel('../data/prepared.xlsx', sheet_name='Chickpea'); chickpea['crop'] = 'chickpea'
mustard = pd.read_excel('../data/prepared.xlsx', sheet_name='Mustard')  ; mustard['crop']  = 'mustard'

In [3]:
data = pd.concat([cowpea, rice, maize, chickpea, mustard], axis=0).reset_index(drop=True)

# Remove outliers
clf = LocalOutlierFactor(n_neighbors=20)
new_data = []
org_cols = data.columns
for i, outlier_label in enumerate(clf.fit_predict(data[['GSR', 'CT']])):
    if outlier_label==1:
        new_data.append(data.iloc[i,:])
data = pd.DataFrame(new_data, columns=org_cols)
data = data[(data['GSR']<300) | (data['Rn']>150)]
data = data[(data['Rn']<500) | (data['crop']!='cowpea')]
data = data[(data['Rn']<400) | (data['GSR']>500) | (data['crop']!='rice')]
data = data[(data['Rn']<300) | (data['GSR']>375)]

data.loc[:, 'Time'] = data.loc[:, 'Time'].apply(lambda x: x.hour)
data.loc[:, 'timesin'] = np.sin(data.loc[:, 'Time'] * (2 * np.pi) / 12)
data.loc[:, 'timecos'] = np.cos(data.loc[:, 'Time'] * (2 * np.pi) / 12)
df = pd.get_dummies(data[['crop']], drop_first=True)
data = pd.concat([df, data], axis=1)

In [4]:
feature_cols = [c for c in data.columns if c not in ['ST_5cm','ST_10cm','ST_15cm','Date','Time','crop','Rn']]
X = data[feature_cols]
y = data['Rn']

In [5]:
print(X.shape)
print(y.shape)

(285, 8)
(285,)


In [6]:
def create_model(trial):
    n_layers = trial.suggest_int('num_layers', 1, 6, 1)
    model = Sequential()
    model.add(Dense(trial.suggest_int('num_hidden_units', 8, 136, 8), input_shape=(len(feature_cols),), activation='relu'))
    for i in range(n_layers):
        model.add(Dense(trial.suggest_int('num_hidden_units', 8, 136, 8), activation='relu'))
    model.add(Dense(1))
    
    return model

In [7]:
def objective(trial):
    model = create_model(trial)
    optimizer = Adam(learning_rate=0.009)
    
    model.compile(optimizer=optimizer, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])
    
    # Model summary
    # print(model.summary())
    
    history = model.fit(X, y, validation_split=0.2, epochs=3000, batch_size=512, verbose=0, )
    
    print("Training loss:", np.min(history.history['loss']))
    print("Validation loss:", np.min(history.history['val_loss']))

    if 'root_mean_squared_error' in history.history.keys():
        print("Training loss:", np.min(history.history['root_mean_squared_error']))
        print("Validation loss:", np.min(history.history['val_root_mean_squared_error']))
    
    return np.min(history.history['val_root_mean_squared_error'])

In [8]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

print(study.best_params)

[I 2020-10-18 20:59:47,670] A new study created in memory with name: no-name-7b7a3333-a904-4f30-b47e-fd174b6c1b94


Training loss: 1093.0751953125
Validation loss: 1284.215087890625
Training loss: 33.06168746948242
Validation loss: 35.83594512939453


[I 2020-10-18 21:00:42,363] Trial 0 finished with value: 35.83594512939453 and parameters: {'num_layers': 2, 'num_hidden_units': 88}. Best is trial 0 with value: 35.83594512939453.


Training loss: 1638.5501708984375
Validation loss: 1788.971923828125
Training loss: 40.479007720947266
Validation loss: 42.296241760253906


[I 2020-10-18 21:01:37,359] Trial 1 finished with value: 42.296241760253906 and parameters: {'num_layers': 6, 'num_hidden_units': 32}. Best is trial 0 with value: 35.83594512939453.


Training loss: 2065.72607421875
Validation loss: 1199.529052734375
Training loss: 45.450260162353516
Validation loss: 34.63421630859375


[I 2020-10-18 21:02:31,598] Trial 2 finished with value: 34.63421630859375 and parameters: {'num_layers': 5, 'num_hidden_units': 16}. Best is trial 2 with value: 34.63421630859375.


Training loss: 1871.291259765625
Validation loss: 1505.330078125
Training loss: 43.2584228515625
Validation loss: 38.798583984375


[I 2020-10-18 21:03:25,579] Trial 3 finished with value: 38.798583984375 and parameters: {'num_layers': 5, 'num_hidden_units': 24}. Best is trial 2 with value: 34.63421630859375.


Training loss: 1258.5758056640625
Validation loss: 1305.3184814453125
Training loss: 35.47641372680664
Validation loss: 36.12919235229492


[I 2020-10-18 21:04:18,721] Trial 4 finished with value: 36.12919235229492 and parameters: {'num_layers': 2, 'num_hidden_units': 112}. Best is trial 2 with value: 34.63421630859375.


Training loss: 2208.16357421875
Validation loss: 1944.607421875
Training loss: 46.9911003112793
Validation loss: 44.09770202636719


[I 2020-10-18 21:05:13,488] Trial 5 finished with value: 44.09770202636719 and parameters: {'num_layers': 6, 'num_hidden_units': 16}. Best is trial 2 with value: 34.63421630859375.


Training loss: 1288.72412109375
Validation loss: 1443.695556640625
Training loss: 35.8988037109375
Validation loss: 37.995994567871094


[I 2020-10-18 21:06:06,453] Trial 6 finished with value: 37.995994567871094 and parameters: {'num_layers': 2, 'num_hidden_units': 88}. Best is trial 2 with value: 34.63421630859375.


Training loss: 2107.791015625
Validation loss: 1913.0765380859375
Training loss: 45.91068649291992
Validation loss: 43.738731384277344


[I 2020-10-18 21:06:59,666] Trial 7 finished with value: 43.738731384277344 and parameters: {'num_layers': 4, 'num_hidden_units': 24}. Best is trial 2 with value: 34.63421630859375.


Training loss: 1074.787109375
Validation loss: 1502.041259765625
Training loss: 32.7839469909668
Validation loss: 38.75617599487305


[I 2020-10-18 21:07:53,262] Trial 8 finished with value: 38.75617599487305 and parameters: {'num_layers': 5, 'num_hidden_units': 96}. Best is trial 2 with value: 34.63421630859375.


Training loss: 1412.5081787109375
Validation loss: 1126.55615234375
Training loss: 37.583351135253906
Validation loss: 33.564208984375


[I 2020-10-18 21:08:45,765] Trial 9 finished with value: 33.564208984375 and parameters: {'num_layers': 2, 'num_hidden_units': 112}. Best is trial 9 with value: 33.564208984375.


Training loss: 1688.0821533203125
Validation loss: 1438.3067626953125
Training loss: 41.08627700805664
Validation loss: 37.92501449584961


[I 2020-10-18 21:09:37,972] Trial 10 finished with value: 37.92501449584961 and parameters: {'num_layers': 1, 'num_hidden_units': 136}. Best is trial 9 with value: 33.564208984375.


Training loss: 1865.5423583984375
Validation loss: 1549.9150390625
Training loss: 43.191925048828125
Validation loss: 39.368961334228516


[I 2020-10-18 21:10:31,301] Trial 11 finished with value: 39.368961334228516 and parameters: {'num_layers': 3, 'num_hidden_units': 56}. Best is trial 9 with value: 33.564208984375.


Training loss: 1208.6866455078125
Validation loss: 1418.2607421875
Training loss: 34.766170501708984
Validation loss: 37.6598014831543


[I 2020-10-18 21:11:25,159] Trial 12 finished with value: 37.6598014831543 and parameters: {'num_layers': 4, 'num_hidden_units': 128}. Best is trial 9 with value: 33.564208984375.


Training loss: 2106.3203125
Validation loss: 1285.69873046875
Training loss: 45.8946647644043
Validation loss: 35.85663986206055


[I 2020-10-18 21:12:17,880] Trial 13 finished with value: 35.85663986206055 and parameters: {'num_layers': 1, 'num_hidden_units': 56}. Best is trial 9 with value: 33.564208984375.


Training loss: 1823.903076171875
Validation loss: 1775.5633544921875
Training loss: 42.70718002319336
Validation loss: 42.13743591308594


[I 2020-10-18 21:13:12,321] Trial 14 finished with value: 42.13743591308594 and parameters: {'num_layers': 3, 'num_hidden_units': 64}. Best is trial 9 with value: 33.564208984375.


Training loss: 1113.258056640625
Validation loss: 1313.9063720703125
Training loss: 33.36552047729492
Validation loss: 36.24784469604492


[I 2020-10-18 21:14:05,436] Trial 15 finished with value: 36.24784469604492 and parameters: {'num_layers': 5, 'num_hidden_units': 112}. Best is trial 9 with value: 33.564208984375.


Training loss: 990.3875122070312
Validation loss: 1455.240234375
Training loss: 31.470422744750977
Validation loss: 38.14760971069336


[I 2020-10-18 21:14:57,735] Trial 16 finished with value: 38.14760971069336 and parameters: {'num_layers': 4, 'num_hidden_units': 120}. Best is trial 9 with value: 33.564208984375.


Training loss: 2456.85546875
Validation loss: 1359.73974609375
Training loss: 49.56667709350586
Validation loss: 36.87464904785156


[I 2020-10-18 21:15:49,218] Trial 17 finished with value: 36.87464904785156 and parameters: {'num_layers': 3, 'num_hidden_units': 8}. Best is trial 9 with value: 33.564208984375.


Training loss: 2202.802001953125
Validation loss: 1732.7283935546875
Training loss: 46.934017181396484
Validation loss: 41.62605285644531


[I 2020-10-18 21:16:39,780] Trial 18 finished with value: 41.62605285644531 and parameters: {'num_layers': 1, 'num_hidden_units': 40}. Best is trial 9 with value: 33.564208984375.


Training loss: 1826.6649169921875
Validation loss: 1503.4007568359375
Training loss: 42.739501953125
Validation loss: 38.773712158203125


[I 2020-10-18 21:17:34,137] Trial 19 finished with value: 38.773712158203125 and parameters: {'num_layers': 6, 'num_hidden_units': 80}. Best is trial 9 with value: 33.564208984375.


{'num_layers': 2, 'num_hidden_units': 112}
