# ***EE chula model2***

# Implementation
- number of layer = 4
- number of hidden layer = 3
- layer 1 = 64 units, activation function is relu
- layer 2 = 64 units, activation function is relu
- layer 3 = 64 units, activation function is relu
- loss = Mean Squared Error (MSE) = 0.03
- metrics = Mean Absolute Error (MAE) 
- optimizer = RMSPropOptimizer
- kernel_initializer = uniform
- stop training when the validation score doesn't improve. = about 500 epochs

# Feature
measurement data of solar cell on the rooftop of EE building (capacity of 8kW) collected during Jan 2017-Jun 2018 through CUBEMS portal. 

- datetime
- date
- time
- I is Solar irradiance (W/m2)
- T is Temperature (oC*10)
- UV is UV index (UV index*10)
- WS is Wind speed (m/s*10)
- RH is Relative humidity (%)
- P is Solar power (W*min)

# Result
- MSE = 0.03 on the test set


# Note
- loss=mean_squared_error,mean_absolute_error,mean_absolute_percentage_error,mean_squared_logarithmic_error,
     squared_hinge,hinge,logcosh<br>
- optimizer = sgd(Stochastic gradient descent optimizer),RMSProp optimizer,Adagrad optimizer,Adadelta  <br>                     optimizer,Adam,Adamax,Nadam,TFOptimizer
- metrics = mae,acc<br>

In [2]:
from __future__ import absolute_import, division, print_function

import os

import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

tf.__version__

'1.9.0'

# Load data for train

In [3]:
train = pd.read_csv('data/Train_data/dataset_rev4_train.csv')
#buffer datetime
buffer_datetime_train = train.datetime
#remove object
train = train.select_dtypes(exclude=['object'])
#replace misssing value
train.fillna(0,inplace=True)

# Load data for test

In [4]:
test = pd.read_csv('data/test_data/dataset_rev4_test.csv')
#buffer datetime
buffer_datetime_test = test.datetime
#remove object
test = test.select_dtypes(exclude=['object'])
#replace misssing value
test.fillna(0,inplace=True)

In [5]:
print('dimension of train:', train.shape)
print('dimension of test:', test.shape)

dimension of train: (131586, 6)
dimension of test: (16147, 5)


In [6]:
print("features:",list(train.columns))

features: ['I', 'T', 'UV', 'WS', 'RH', 'P']


# Remove outlier

In [7]:
from sklearn.ensemble import IsolationForest
clf = IsolationForest(max_samples = 100, random_state = 42)
clf.fit(train)
y_noano = clf.predict(train)
y_noano = pd.DataFrame(y_noano, columns = ['Top'])
y_noano[y_noano['Top'] == 1].index.values

train = train.iloc[y_noano[y_noano['Top'] == 1].index.values]
train.reset_index(drop = True, inplace = True)
print("Number of Outliers:", y_noano[y_noano['Top'] == -1].shape[0])
print("Number of rows without outliers:", train.shape[0])

Number of Outliers: 13159
Number of rows without outliers: 118427


# Normalize
When input data features have values with different ranges, each feature should be scaled independently.

In [8]:
import warnings
warnings.filterwarnings('ignore')

col_train = list(train.columns)
col_train_bis = list(train.columns)

col_train_bis.remove('P')

mat_train = np.matrix(train)
mat_test  = np.matrix(test)

mat_new = np.matrix(train.drop('P',axis = 1))
mat_y = np.array(train.P).reshape((118427,1))

prepro_y = MinMaxScaler()
prepro_y.fit(mat_y)

prepro = MinMaxScaler()
prepro.fit(mat_train)

prepro_test = MinMaxScaler()
prepro_test.fit(mat_new)

train = pd.DataFrame(prepro.transform(mat_train),columns = col_train)
test  = pd.DataFrame(prepro_test.transform(mat_test),columns = col_train_bis)

# Create training_set and prediction_set

In [10]:
# List of features
COLUMNS = col_train #column train (x train)
FEATURES = col_train_bis  #column train-label (x test)
LABEL = "P"

# Columns
feature_cols = FEATURES #(x test)

# Training set and Prediction set with the features to predict
training_set = train[COLUMNS] #column train (x train)
prediction_set = train.P # column P

In [11]:
print(type(training_set))
print(type(prediction_set))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


# Create x_train and Test 

In [13]:
train_data, test_data, train_labels, test_labels = train_test_split(training_set[FEATURES] , prediction_set, test_size=0.33, random_state=42)

In [14]:
print(type(train_data))
print(type(test_data))
print(type(train_labels))
print(type(test_labels))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>


In [15]:
print(train_data.shape)
print(test_data.shape)
print(train_labels.shape)
print(test_labels.shape)

(79346, 5)
(39081, 5)
(79346,)
(39081,)


In [16]:
train_data = train_data.values
test_data = test_data.values
train_labels = train_labels.values
test_labels = test_labels.values

# Shuffle the training set

In [17]:
order = np.argsort(np.random.random(train_labels.shape))
train_data = train_data[order]
train_labels = train_labels[order]

In [18]:
print("Training set: {}".format(train_data.shape))  
print("Testing set:  {}".format(test_data.shape))   

Training set: (79346, 5)
Testing set:  (39081, 5)


# Create model

In [35]:
# Returns a short sequential model
def create_model():
  model = tf.keras.models.Sequential([
    keras.layers.Dense(64, activation=tf.nn.relu, input_shape=(train_data.shape[1],)),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(1)
  ])
  
  #optimizer = tf.train.RMSPropOptimizer(0.001)
  optimizer = tf.keras.optimizers.Adam(lr=0.001)

  model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae'])
  return model


# Create a basic model instance
model = create_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_48 (Dense)             (None, 64)                384       
_________________________________________________________________
dense_49 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_50 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_51 (Dense)             (None, 1)                 65        
Total params: 8,769
Trainable params: 8,769
Non-trainable params: 0
_________________________________________________________________


# Save checkpoints during training

In [21]:
checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

# Create checkpoint callback
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, 
                                                 save_weights_only=True,
                                                 verbose=1)

model = create_model()

model.fit(train_data, train_labels,  epochs = 10, 
          validation_data = (test_data,test_labels),
          callbacks = [cp_callback])  # pass callback to training

Train on 79346 samples, validate on 39081 samples
Epoch 1/10

Epoch 00001: saving model to training_1/cp.ckpt
Epoch 2/10

Epoch 00002: saving model to training_1/cp.ckpt
Epoch 3/10

Epoch 00003: saving model to training_1/cp.ckpt
Epoch 4/10

Epoch 00004: saving model to training_1/cp.ckpt
Epoch 5/10

Epoch 00005: saving model to training_1/cp.ckpt
Epoch 6/10

Epoch 00006: saving model to training_1/cp.ckpt
Epoch 7/10

Epoch 00007: saving model to training_1/cp.ckpt
Epoch 8/10

Epoch 00008: saving model to training_1/cp.ckpt
Epoch 9/10

Epoch 00009: saving model to training_1/cp.ckpt
Epoch 10/10

Epoch 00010: saving model to training_1/cp.ckpt


<tensorflow.python.keras.callbacks.History at 0x7f01d4cd15f8>

In [22]:
!ls {checkpoint_dir}

checkpoint  cp.ckpt.data-00000-of-00001  cp.ckpt.index


# Create Untrained model

In [25]:
model = create_model()

[loss, mae] = model.evaluate(test_data, test_labels)
print("Testing set Mean Abs Error: ${:7.2f}".format(mae))

Testing set Mean Abs Error: $   0.34


# Load the weights from the checkpoint, and re-evaluate:

In [26]:
model.load_weights(checkpoint_path)
[loss, mae] = model.evaluate(test_data, test_labels)
print("Restored model, MAE: {:5.2f}%".format(mae))

Restored model, MAE:  0.03%


# Checkpoint callback options

In [27]:
# include the epoch in the file name. (uses `str.format`)
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, verbose=1, save_weights_only=True,
    # Save weights, every 5-epochs.
    period=5)

model = create_model()
model.fit(train_data, train_labels,
          epochs = 50, callbacks = [cp_callback],
          validation_data = (test_data,test_labels),
          verbose=0)


Epoch 00005: saving model to training_2/cp-0005.ckpt

Epoch 00010: saving model to training_2/cp-0010.ckpt

Epoch 00015: saving model to training_2/cp-0015.ckpt

Epoch 00020: saving model to training_2/cp-0020.ckpt

Epoch 00025: saving model to training_2/cp-0025.ckpt

Epoch 00030: saving model to training_2/cp-0030.ckpt

Epoch 00035: saving model to training_2/cp-0035.ckpt

Epoch 00040: saving model to training_2/cp-0040.ckpt

Epoch 00045: saving model to training_2/cp-0045.ckpt

Epoch 00050: saving model to training_2/cp-0050.ckpt


<tensorflow.python.keras.callbacks.History at 0x7f01d4d385c0>

In [28]:
import pathlib

# Sort the checkpoints by modification time.
checkpoints = pathlib.Path(checkpoint_dir).glob("*.index")
checkpoints = sorted(checkpoints, key=lambda cp:cp.stat().st_mtime)
checkpoints = [cp.with_suffix('') for cp in checkpoints]
latest = str(checkpoints[-1])
checkpoints

[PosixPath('training_2/cp-0030.ckpt'),
 PosixPath('training_2/cp-0035.ckpt'),
 PosixPath('training_2/cp-0040.ckpt'),
 PosixPath('training_2/cp-0045.ckpt'),
 PosixPath('training_2/cp-0050.ckpt')]

In [29]:
model = create_model()
model.load_weights(latest)
[loss, mae] = model.evaluate(test_data, test_labels)
print("Restored model, MAE: {:5.2f}%".format(mae))

Restored model, MAE:  0.03%


# Manually save weights

In [31]:
# Save the weights
model.save_weights('./checkpoints/my_checkpoint')

# Restore the weights
model = create_model()
model.load_weights('./checkpoints/my_checkpoint')

[loss, mae] = model.evaluate(test_data, test_labels)
print("Restored model, MAE: {:5.2f}%".format(mae))

Restored model, MAE:  0.03%


# Save the entire model
The entire model can be saved to a file that contains the weight values, the model's configuration, and even the optimizer's configuration

In [36]:
model = create_model()
model.fit(train_data, train_labels, epochs=5)
# Save entire model to a HDF5 file
model.save('my_model.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [37]:
# Recreate the exact same model, including weights and optimizer.
new_model = keras.models.load_model('my_model.h5')
new_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_52 (Dense)             (None, 64)                384       
_________________________________________________________________
dense_53 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_54 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_55 (Dense)             (None, 1)                 65        
Total params: 8,769
Trainable params: 8,769
Non-trainable params: 0
_________________________________________________________________


# Check its accuracy:

In [39]:
[loss, mae] = new_model.evaluate(test_data, test_labels)
print("Restored model, MAE: {:5.2f}%".format(mae))

Restored model, MAE:  0.03%
