In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from problem import get_train_data
from problem import get_test_data
from catboost import CatBoost, CatBoostClassifier
from keras import layers, models, optimizers, losses, metrics
import pandas as pd
import tensorflow as tf
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import StandardScaler
from keras import backend as K

In [2]:
def compute_rolling_std(X_df, feature, time_window, center=True):
    """
    For a given dataframe, compute the standard deviation over
    a defined period of time (time_window) of a defined feature

    Parameters
    ----------
    X : dataframe
    feature : str
        feature in the dataframe we wish to compute the rolling std from
    time_window : str
        string that defines the length of the time window passed to `rolling`
    center : bool
        boolean to indicate if the point of the dataframe considered is
        center or end of the window
    """
    name = "_".join([feature, time_window, "std"])
    X_df[name] = X_df[feature].rolling(time_window, center=center).std()
    X_df[name] = X_df[name].ffill().bfill()
    X_df[name] = X_df[name].astype(X_df[feature].dtype)
    return X_df


def compute_rolling_mean(X_df, feature, time_window, center=True):
    """
    For a given dataframe, compute the mean over
    a defined period of time (time_window) of a defined feature

    Parameters
    ----------
    X : dataframe
    feature : str
        feature in the dataframe we wish to compute the rolling mean from
    time_window : str
        string that defines the length of the time window passed to `rolling`
    center : bool
        boolean to indicate if the point of the dataframe considered is
        center or end of the window
    """
    name = "_".join([feature, time_window, "mean"])
    X_df[name] = X_df[feature].rolling(time_window, center=center).mean()
    X_df[name] = X_df[name].ffill().bfill()
    X_df[name] = X_df[name].astype(X_df[feature].dtype)
    return X_df

def compute_rolling_variables(X_df, feature, time_window, center=True):
    X_df = compute_rolling_mean(X_df, feature, time_window, center)
    #X_df = compute_rolling_std(X_df, feature, time_window, center)
    return X_df

def clip_column(X_df, column, min, max):
    X_df[column] = X_df[column].clip(min, max)
    return X_df

In [3]:
def transform1(X):
    X = clip_column(X, 'Beta', 0, 250)
    X = clip_column(X, 'Np_nl', 0, 100)
    X = clip_column(X, 'Np', 0, 500)

    return X

In [4]:
X, y = get_train_data()
Xt, yt = get_test_data()
X = transform1(X)
Xt = transform1(Xt)
X.iloc[0:5]


Unnamed: 0,B,Bx,Bx_rms,By,By_rms,Bz,Bz_rms,Na_nl,Np,Np_nl,...,Range F 8,Range F 9,V,Vth,Vx,Vy,Vz,Beta,Pdyn,RmsBob
1997-10-01 00:00:00,6.584763,3.753262,2.303108,0.96614,2.602693,-5.179685,2.668414,2.290824,23.045732,24.352797,...,2757919000.0,2472087000.0,378.313934,80.613098,-351.598389,-138.521454,6.956387,7.64134,5.487331e-15,0.668473
1997-10-01 00:10:00,6.036456,0.693559,1.810752,-0.904843,2.16557,-1.944006,2.372931,2.119593,23.000492,20.993362,...,3365612000.0,3087122000.0,350.421021,69.919327,-331.012146,-110.970787,-21.269474,9.149856,4.783776e-15,0.753848
1997-10-01 00:20:00,5.653682,-4.684786,0.893058,-2.66883,0.768677,1.479302,1.069266,2.876815,20.676191,17.496399,...,1675611000.0,1558640000.0,328.324493,92.194435,-306.114899,-117.035202,-13.018987,11.924199,3.719768e-15,0.282667
1997-10-01 00:30:00,5.461768,-4.672382,1.081638,-2.42563,0.765681,1.203713,0.934445,2.851195,20.730188,16.747108,...,1589037000.0,1439569000.0,319.436859,94.230705,-298.460938,-110.403969,-20.350492,16.032987,3.525211e-15,0.304713
1997-10-01 00:40:00,6.177846,-5.23011,1.046126,-2.872561,0.635256,1.50501,0.850657,3.317076,20.675701,17.524536,...,1812308000.0,1529260000.0,327.545929,89.292595,-307.30307,-111.865845,-12.313167,10.253789,3.694283e-15,0.244203


In [5]:
Ten=[]
for i in range(X.shape[0]-6):
    Ten.append(tf.convert_to_tensor(X.iloc[i:i+6]))
X_train = tf.stack(Ten, axis=0)
X_train.shape

TensorShape([509828, 6, 33])

In [6]:
Ten = []
for i in range(X.shape[0]-6):
    Ten.append(tf.convert_to_tensor(y.iloc[i:i+6].mean()))
y_train = tf.stack(Ten, axis=0)
y_train.shape

TensorShape([509828])

In [7]:
Ten=[]
for i in range(Xt.shape[0]-6):
    Ten.append(tf.convert_to_tensor(Xt.iloc[i:i+6]))
X_test = tf.stack(Ten, axis=0)
X_test.shape

TensorShape([205568, 6, 33])

In [8]:
Ten = []
for i in range(Xt.shape[0]-6):
    Ten.append(tf.convert_to_tensor(yt.iloc[i:i+6].mean()))
y_test = tf.stack(Ten, axis=0)
y_test.shape

TensorShape([205568])

In [16]:
K.clear_session()
model = models.Sequential([
    layers.Conv1D(2000, 2, activation='relu', input_shape=(6, 33)),
    layers.Conv1D(400, 2, activation='relu'),
    layers.Flatten(),
    layers.Dense(2, activation='sigmoid')
])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 5, 2000)           134000    
                                                                 
 conv1d_1 (Conv1D)           (None, 4, 400)            1600400   
                                                                 
 flatten (Flatten)           (None, 1600)              0         
                                                                 
 dense (Dense)               (None, 2)                 3202      
                                                                 
Total params: 1,737,602
Trainable params: 1,737,602
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=metrics.MeanAbsoluteError())

In [18]:
model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1bbe49a9400>

In [19]:
pred = model.predict(X_test)



In [23]:
np.unique(pred, return_counts=True)

(array([0.], dtype=float32), array([411136], dtype=int64))