In [5]:
import numpy as np
import pandas as pd
import pickle
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import dataholders
import utils
import filters


path = '../data/data.pickle'

temp_path = '../data/raw/uft_flight07.mat'
wind_path = '../data/raw/actos_flight07.mat'

temp_path2 = '../data/raw/uft_flight16.mat'
wind_path2 = '../data/raw/actos_flight16.mat'

In [6]:
from keras.layers import Dense, Dropout, LSTM, Flatten, Conv1D, MaxPool1D, AvgPool1D
from keras.models import Sequential
import keras.backend as K

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
def convert_to_ml(temperature, N, mean=0., std=1.):
    """
    Converts a temperature record to a standard (X, Y) machine learning problem
    
    Args:
        temperature: array to be predicted
        N: int, how many past values to use for predictions
    
    Returns:
        X, Y: array
        mean, std: float
    """
    if mean == 0. and std == 1.:
        mean, std = temperature.mean(), temperature.std()
    
    temperature = (temperature - mean) / (std)
    rolled = utils.rolling_window(temperature, N)
    X = rolled[:-1,:]
    Y = temperature[N:]
    return X, Y, mean, std

In [14]:
def holder_to_ml(holder, N, mean1=0., std1=1., mean2=0., std2=1.):
    X1, Y1, mean1, std1 = convert_to_ml(holder.T1, N, mean1, std1)
    X2, Y2, mean2, std2 = convert_to_ml(holder.T2, N, mean2, std2)
    
    LWC = utils.rolling_window(holder.lwc, N)[:-1,:]
    v1 = utils.rolling_window(holder.v1, N)[:-1, :]
    v2 = utils.rolling_window(holder.v2, N)[:-1, :]
    v3 = utils.rolling_window(holder.v3, N)[:-1, :]
    
    v1 = (v1 - v1.mean()) / v1.std()
    v2 = (v2 - v2.mean()) / v2.std()
    v3 = (v3 - v3.mean()) / v3.std()
    
    X = np.dstack([X1, X2, LWC, v1, v2, v3])
    Y = np.stack([Y1, Y2], axis=1)
    
    return X, Y, mean1, std1, mean2, std2
    

In [47]:
N = 20

In [48]:
holder = dataholders.TempWindData(temp_path, wind_path)

In [49]:
holder.smooth_temperatures('mean', 5)
holder.T1 = holder.T1_smooth
holder.T2 = holder.T2_smooth

In [50]:
X, Y, mean1, std1, mean2, std2 = holder_to_ml(holder, N)

In [51]:
X.shape

(663548, 20, 6)

In [52]:
model = Sequential()
model.add(Conv1D(10, kernel_size=3, activation='relu', input_shape=(N, 6)))
model.add(Conv1D(10, kernel_size=5, activation='relu'))
model.add(Flatten())
model.add(Dense(2))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_19 (Conv1D)           (None, 18, 10)            190       
_________________________________________________________________
conv1d_20 (Conv1D)           (None, 14, 10)            510       
_________________________________________________________________
flatten_8 (Flatten)          (None, 140)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 282       
Total params: 982
Trainable params: 982
Non-trainable params: 0
_________________________________________________________________


In [55]:
model = Sequential()
model.add(Conv1D(50, kernel_size=5, activation='relu', padding='same', input_shape=(N, 6)))
model.add(Conv1D(20, kernel_size=5, activation='relu', padding='same'))
model.add(MaxPool1D(2, padding='same'))
model.add(Conv1D(40, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool1D(2, padding='same'))
model.add(Conv1D(80, kernel_size=3, activation='relu', padding='same'))
model.add(Flatten())
model.add(Dense(2))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_27 (Conv1D)           (None, 20, 50)            1550      
_________________________________________________________________
conv1d_28 (Conv1D)           (None, 20, 20)            5020      
_________________________________________________________________
max_pooling1d_7 (MaxPooling1 (None, 10, 20)            0         
_________________________________________________________________
conv1d_29 (Conv1D)           (None, 10, 40)            2440      
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 (None, 5, 40)             0         
_________________________________________________________________
conv1d_30 (Conv1D)           (None, 5, 80)             9680      
_________________________________________________________________
flatten_11 (Flatten)         (None, 400)               0         
__________

In [58]:
model.compile('adam', loss='mse', metrics=['mae'])

In [57]:
history = model.fit(X, Y, batch_size=10000, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30

KeyboardInterrupt: 

In [27]:
plt.plot(np.log(history.history['mean_absolute_error']))
plt.show()

In [31]:
plt.plot(np.log(history.history['loss']))
plt.show()

In [59]:
preds = model.predict(X, batch_size=1024)

In [60]:
model.evaluate(X, Y, 2048)



[0.00026607756749490727, 0.0092687813478710031]

In [62]:
plt.rcParams['figure.figsize'] = [20,8]

plt.plot(mean1 + std1*preds[:,0], c='b')
plt.plot(mean1 + std1*Y[:,0], c='g')

plt.plot(mean1 + std1*preds[:,1], c='m')
plt.plot(mean1 + std1*Y[:,1], c='g')
plt.show()

# Ideas:

0. Use T2, lwc, sonic(1/2/3)

Rebuild the pipeline for easier Neptune/Floyd use

In [73]:
holder2 = dataholders.TempWindData(temp_path2, wind_path2)
X_test, Y_test, _,_,_,_ = holder_to_ml(holder2, N, mean1, std1, mean2, std2)

In [74]:
model.evaluate(X_test, Y_test, batch_size=2048)



[0.00031170767597643324, 0.0084031297358643491]

In [75]:
preds_test = model.predict(X_test, batch_size=1024)

In [76]:
plt.rcParams['figure.figsize'] = [20,8]

plt.plot(mean1 + std1*preds_test[:,0], c='b')
plt.plot(mean1 + std1*Y_test[:,0], c='g')

plt.plot(mean1 + std1*preds_test[:,1], c='m')
plt.plot(mean1 + std1*Y_test[:,1], c='g')
plt.show()

In [114]:
plt.plot(holder.T1)
plt.plot(holder.T2)
plt.show()