In [1]:
import numpy as np
import pandas as pd

# import tensorflow as tf
# from tensorflow.keras import datasets, layers, models

import skimage.measure
from scipy import signal

from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler  
from sklearn.decomposition import PCA
from sklearn.model_selection import ShuffleSplit

from matplotlib import pyplot as plt
import os
from multiprocessing import Pool

In [2]:
my_path = str(os.getcwd())
X_train = np.load(my_path+'/.data/X_train_surge_new.npz')
X_train_slp = X_train['slp']
X_train_surge1 = X_train['surge1_input']
X_train_surge2 = X_train['surge2_input']
X_train_surge = [None,X_train_surge1,X_train_surge2]
Y_train = np.array(pd.read_csv(my_path+'/.data/Y_train_surge.csv'))
X_test = np.load(my_path+'/.data/X_test_surge_new.npz')

In [3]:
def surge_prediction_metric(dataframe_y_true, dataframe_y_pred):
    weights = np.linspace(1, 0.1, 10)[np.newaxis]
    surge1_columns = [
        'surge1_t0', 'surge1_t1', 'surge1_t2', 'surge1_t3', 'surge1_t4',
        'surge1_t5', 'surge1_t6', 'surge1_t7', 'surge1_t8', 'surge1_t9' ]
    surge2_columns = [
        'surge2_t0', 'surge2_t1', 'surge2_t2', 'surge2_t3', 'surge2_t4',
        'surge2_t5', 'surge2_t6', 'surge2_t7', 'surge2_t8', 'surge2_t9' ]
    surge1_score = (weights * (dataframe_y_true[surge1_columns].values - dataframe_y_pred[surge1_columns].values)**2).mean()
    surge2_score = (weights * (dataframe_y_true[surge2_columns].values - dataframe_y_pred[surge2_columns].values)**2).mean()

    return surge1_score + surge2_score

def metric_one_surge(y_true,y_pred):
    weights = np.linspace(1, 0.1, 10)[np.newaxis]
    score = (weights*(y_true-y_pred)**2).mean()
    return score

### Utilitary functions for ruling 'weights' issue

In [4]:
# should be applied to Y_train before learning
def transform(y):
    weights = np.sqrt(np.linspace(1, 0.1, 10)[np.newaxis])
    return weights*y

# should be applied to Y_pred after test
def inverse_transform(y):
    weights = 1/np.sqrt(np.linspace(1, 0.1, 10)[np.newaxis])
    return weights*y

## Use CNN only on pressure

In [35]:
kernel = np.array([[1, 2, 1],
                   [2, 4, 2],
                   [1, 2, 1]])
kernel = (1/kernel.sum()) * kernel

def dimReduceConv(img, nbsteps):
    for i in range(nbsteps):
        # gaussian convolution
        img = signal.convolve2d(img, kernel)
        #print(img.shape)
        # max pooling
        img = skimage.measure.block_reduce(img, (2,2), np.max)
        #print(img.shape)
        
    #img = signal.convolve2d(img, kernel)
    #print(img.shape)
    return img
    

Test convolution

In [36]:
img = X_train_slp[0][0]
print(img)
imgr = dimReduceConv(img,6)

[[102389.25 102458.25 102514.25 ... 101276.25 101275.25 101271.25]
 [102432.25 102509.25 102572.25 ... 101268.25 101266.25 101268.25]
 [102467.25 102552.25 102624.25 ... 101255.25 101255.25 101256.25]
 ...
 [101414.25 101423.25 101407.25 ... 100039.25 100005.25 100019.25]
 [101986.25 102006.25 101961.25 ... 100100.25 100076.25 100096.25]
 [102685.25 102702.25 102688.25 ... 100334.25 100330.25 100362.25]]


Convolute the entire

In [37]:
def slp_to_flat_images(slp):
    a,b,c,d = np.shape(slp)
    return slp.reshape((a*b,c,d))

class Preprocess:
    
    _scaler1 = StandardScaler()
    _scaler2 = StandardScaler()
    
    def fit_transform(self,slp,nb_levels):
        #reshape
        list_flat_images = slp_to_flat_images(slp)

        #normalize
        list_flat_images = self._scaler1.fit_transform(list_flat_images.reshape(-1, list_flat_images.shape[-1])).reshape(list_flat_images.shape)
        
        self.nblevels = nb_levels
        
        # convolve
        slp_LD = []
        for img in list_flat_images:
            flatImg = dimReduceConv(img,nb_levels).flatten()
            slp_LD.append(flatImg)
        
        # reshape array
        slp_LD = np.array([np.concatenate(slp_LD[i*40:(i+1)*40]) for i in range(len(slp))])
        
        #renormalize
        slp_LD = self._scaler2.fit_transform(slp_LD)
        
        return slp_LD
    
    def transform(self,slp):
        flat = slp_to_flat_images(slp)
        flat = self._scaler1.transform(flat.reshape(-1, flat.shape[-1])).reshape(flat.shape)

        # convolve
        slp_LD = []
        for img in flat:
            flatImg = dimReduceConv(img,self.nblevels).flatten()
            slp_LD.append(flatImg)
                          
        slp_LD = np.array([np.concatenate(slp_LD[i*40:(i+1)*40]) for i in range(len(slp))])
        slp_LD = self._scaler2.transform(slp_LD)
        # print(slp_LD.shape)
        # print(slp_LD.shape)
        return slp_LD

In [41]:
# prepare validation dataset (extract test from the data we have)
nbs = 5 #number of splits to do the average on
test_s = 0.1 # size of test sample

ss = ShuffleSplit(n_splits=nbs,test_size=test_s)
X_ind = np.zeros(5599)

# parameters for test
city=1
nbConv=3

x_train = [None] * nbs
x_test = [None] * nbs

y_train = [None] * nbs
y_test = [None] * nbs


for s, (train_index, test_index) in enumerate(ss.split(X_ind)):
    #split data
    print(s)
    slp_train = X_train_slp[train_index]
    slp_test = X_train_slp[test_index]

    surge_input_train = X_train_surge[city][train_index]
    surge_input_test = X_train_surge[city][test_index]
    
    #preprocessing (convolutions and scaling) done once for all
    prepro = Preprocess()
    slp_train_LD = prepro.fit_transform(slp_train, nbConv)
    slp_test_LD = prepro.transform(slp_test)
    
    x_train[s] = np.concatenate((surge_input_train, slp_train_LD), axis=1)
    x_test[s] = np.concatenate((surge_input_test, slp_test_LD), axis=1)
    
    print(x_train[s].shape)

    surge_output_train = Y_train[train_index,1:11] if city==1 else Y_train[train_index,11:]
    surge_output_test = Y_train[test_index,1:11] if city==1 else Y_train[test_index,11:]

    y_train[s] = transform(surge_output_train)
    y_test[s] = surge_output_test
  

0


KeyboardInterrupt: 

In [None]:
# function to call for cross validate a parameter
def testParams(learningRate=1e-5, hiddenLayers=(100,),slv='sgd',activ_fun='relu'):
    
    errors = []
    
    for s in range(nbs):
        print(s)
        
        print(x_train[s].shape)
        
        #learn
        clf = MLPRegressor(solver=slv, hidden_layer_sizes=((10+x_train[s].shape[1])//2,), 
                           alpha=learningRate, max_iter=1000, activation=activ_fun)
        clf.fit(x_train[s],y_train[s])
        
        #predict
        y_pred = inverse_transform(clf.predict(x_test[s]))
        
        error = metric_one_surge(y_test[s],y_pred)
        errors.append(error)
        print("error :",error)
    
    print(learningRate,errors)
    return errors  

In [None]:
testParams()