In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import math
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.callbacks import CSVLogger
from keras.callbacks import ProgbarLogger
from numpy import mean
from numpy import std
from keras import backend as K
from keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.callbacks import Callback
import six, csv
from collections import OrderedDict, Iterable
from keras import optimizers
from keras.layers import Dropout
from keras.layers import SpatialDropout1D
from sklearn.metrics import precision_score 

# CNN Model (Regression)

### Import dataset

In [None]:
# loading the datasets
sc_x_train = np.load('sc_x_train_sm3.npy')
sc_y_train = np.load('sc_y_train_sm3.npy')
sc_x_test = np.load('sc_x_test.npy')
sc_y_test = np.load('sc_y_test.npy')
y_test = np.load('y_test.npy')
y_train = np.load('y_train.npy')
y_test_class = np.load('y_test_class.npy', allow_pickle = True)

In [None]:
# format dataset for model input
sc_y_train = sc_y_train.flatten()
print(sc_y_train.shape)
sc_y_test = sc_y_test.flatten()
print(sc_y_test.shape)

### Define model

In [None]:
# Metric def RMSE
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

In [None]:
# saving results per batch
class NBatchCSVLogger(Callback):
    """Callback that streams every batch results to a csv file.
    """
    def __init__(self, filename, separator=',', append=False):
        self.sep = separator
        self.filename = filename
        self.append = append
        self.writer = None
        self.keys = None
        self.append_header = True
        self.file_flags = 'b' if six.PY2 and os.name == 'nt' else ''
        super(NBatchCSVLogger, self).__init__()
    def on_train_begin(self, logs=None):
        if self.append:
            if os.path.exists(self.filename):
                with open(self.filename, 'r' + self.file_flags) as f:
                    self.append_header = not bool(len(f.readline()))
            self.csv_file = open(self.filename, 'a' + self.file_flags)
        else:
            self.csv_file = open(self.filename, 'w' + self.file_flags)
    def on_batch_end(self, batch, logs=None):
        logs = logs or {}
        def handle_value(k):
            is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0
            if isinstance(k, six.string_types):
                return k
            elif isinstance(k, Iterable) and not is_zero_dim_ndarray:
                return '"[%s]"' % (', '.join(map(str, k)))
            else:
                return k
        if self.keys is None:
            self.keys = sorted(logs.keys())
        if self.model.stop_training:
            logs = dict([(k, logs[k]) if k in logs else (k, 'NA') for k in self.keys])
        if not self.writer:
            class CustomDialect(csv.excel):
                delimiter = self.sep
            self.writer = csv.DictWriter(self.csv_file,
                                         fieldnames=['batch'] + self.keys, dialect=CustomDialect)
            if self.append_header:
                self.writer.writeheader()
        row_dict = OrderedDict({'batch': batch})
        row_dict.update((key, handle_value(logs[key])) for key in self.keys)
        self.writer.writerow(row_dict)
        self.csv_file.flush()
    def on_train_end(self, logs=None):
        self.csv_file.close()
        self.writer = None

In [None]:
# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
    verbose, epochs, batch_size = 0, 100, 100
    n_timesteps, n_features = trainX.shape[1], trainX.shape[2]
    model = Sequential()
    model.add(Conv1D(filters=20, kernel_size=2, activation='relu', input_shape=(n_timesteps,n_features), kernel_initializer = 'he_normal'))
    model.add(SpatialDropout1D(0.5))
    model.add(Conv1D(filters=10, kernel_size=2, activation='relu', kernel_initializer = 'he_normal'))
    model.add(MaxPooling1D(pool_size=3))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(20, activation='relu', kernel_initializer = 'he_normal'))
    model.add(Dense(1))
    #opt = optimizers.Adam(learning_rate=0.5)
    model.compile(loss='mse', metrics =[rmse], optimizer='adam')
    
    # fit network
    csv_logger = CSVLogger('training_data.log')
    history = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, validation_data = (testX, testy), callbacks=[csv_logger], verbose=verbose)
    out_batch = NBatchCSVLogger("batch_logs.csv", separator=',', append=False)
    #historyb = model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, callbacks=[out_batch], verbose=verbose)
    
    # save everyhting for later
    wfname = "weights.hdf5"
    fname = "model.hdf5"
    model.save_weights(wfname,overwrite = True)
    model.save(fname, overwrite = True)
    
    # evaluate model
    mse = model.evaluate(testX, testy, batch_size=batch_size, verbose=verbose)

    return mse

### Evalutation

In [None]:
def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('RMSE:mean',m,'std+/-',s)

In [None]:
# running evaluation for a number of times to summarize the results
scores = list()
def run_experiment(repeats=10):
    # repeat experiment
    for r in range(repeats):
        score = evaluate_model(sc_x_train, sc_y_train, sc_x_test, sc_y_test)
        #ensure that the metrics is rmse
        print('>#%d: %.3f' % (r+1, score[1]))
        scores.append(score[1])

run_experiment()

In [None]:
re_scores = np.reshape(scores, (len(scores),1))
re_scores

In [None]:
# rescale the results
# inverse transform model predictions

scaler_out = MinMaxScaler()
scaler_out.fit(y_train)
rmse1 = scaler_out.inverse_transform(re_scores)

In [None]:
# summarize results
summarize_results(rmse1)

### Plotting last fitting results

In [None]:
# loading last repetition of model
fname = "model.hdf5"
model = load_model(fname, custom_objects={"rmse":rmse})

# Loading model training history 
history=pd.read_csv("training_data.log")
historyb=pd.read_csv("batch_logs.csv")

In [None]:
history

In [None]:
historyb

In [None]:
print(model.summary())

In [None]:
top_layer = model.layers[0]
plt.imshow(top_layer.get_weights()[0][:, :, 0].squeeze(), cmap='gray')

In [None]:
# check runs per epoch (loss)

loss_train = history['loss']
loss_val = history['val_loss']
epochs = range(1, len(history['epoch']) +1)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='Validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# check runs per epoch (rmse)

rmse_train = history['rmse']
rmse_val = history['val_rmse']
epochs = range(1,len(history['epoch']) +1)
plt.plot(epochs, rmse_train, 'g', label='Training RMSE')
plt.plot(epochs, rmse_val, 'b', label='Validation RMSE')
plt.title('Training and Validation RMSE')
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend()
plt.show()

In [None]:
len(historyb['batch'])

In [None]:
# check runs per batch (rmse)

rmse_train = historyb['rmse']
batch = range(0,len(historyb['batch']))
plt.plot(batch, rmse_train, 'g', label='Training RMSE')
plt.title('Training RMSE')
plt.xlabel('Batches (32)')
plt.ylabel('RMSE')
plt.legend()
plt.show()

### Checking

In [None]:
sc_x_test.shape

In [None]:
#checking output of the model
model_prediction = model.predict(sc_x_test, verbose = 0)

In [None]:
# inverse transform model predictions

scaler_out = MinMaxScaler()
scaler_out.fit(y_train)
report = scaler_out.inverse_transform(model_prediction)
print(report[0:2,])

In [None]:
y_test[0:2]

In [None]:
# plotting predictions

target = list(y_test.flatten())
prediction = list(report.flatten())
targe = target[0:21]
pred =prediction[0:21]
patterns = range(0,21)
plt.plot(patterns, pred, 'g', label='Model predictions')
plt.plot(patterns, targe, 'b', label='Target values')
plt.xticks(patterns)
plt.title('Target vs Predicted values')
plt.xlabel('Patterns')
plt.ylabel('Values')
plt.legend()
plt.show()

### Compare to classification problem

In [None]:
# convert prediction set to classes

pred_class = np.empty((report.shape[0],report.shape[1]), dtype = 'O')

maxi = max(report[:,0])

for c in range(pred_class.shape[0]):
    if 0 <= report[c,0] <= 12:
        pred_class[c,0] = "good"
    if 12 < report[c,0] <= 35:
        pred_class[c,0] = "moderate"
    if 35 < report[c,0] <= 55:
        pred_class[c,0] = "unhealthy for sensitive groups"
    if 55 < report[c,0] <= 150:
        pred_class[c,0] = "unhealthy"
    if 150 < report[c,0] <= 250:
        pred_class[c,0] = "very unhealthy"
    if 250 < report[c,0] <= maxi:
        pred_class[c,0] = "hazardous"

In [None]:
predict = list(pred_class[:,0])

In [None]:
pre = precision_score(y_test_class, predict, labels=['good', 'hazardous', 'moderate', 'unhealthy', 'unhealthy for sensitive groups', 'very unhealthy'], average='micro')
print('> %.3f' % (pre * 100.0))

### Visualize exreme cases

In [None]:
# extreme cases from 150 onwards
x_test_ex = np.reshape(sc_x_test, (sc_x_test.shape[0],sc_x_test.shape[1]*sc_x_test.shape[2]))
print(x_test_ex.shape)

In [None]:
checking = np.concatenate((x_test_ex, y_test), axis = 1)
extreme = checking[:,105:106]

In [None]:
checking2 = checking[extreme[:,0]>150]
print(checking2.shape)
checking3 = checking2[:,0:105]
print(checking3.shape)

In [None]:
checking4 = np.reshape(checking3, (checking3.shape[0],sc_x_test.shape[1], sc_x_test.shape[2]))
checking4.shape

In [None]:
extreme_predict = model.predict(checking4, verbose = 0)

In [None]:
report2 = scaler_out.inverse_transform(extreme_predict)

In [None]:
checking5 = checking2[:,105:106]

In [None]:
len(target)

In [None]:
len(prediction)

In [None]:
# plotting extreme predictions

target = list(checking5.flatten())
prediction = list(report2.flatten())
targe = target
pred =prediction
patterns = range(0,136)
plt.plot(patterns, pred, 'g', label='Model predictions')
plt.plot(patterns, targe, 'b', label='Target values')
plt.xticks(patterns)
plt.title('Target vs Predicted values')
plt.xlabel('Patterns')
plt.ylabel('Values')
plt.legend()
plt.show()