In [5]:
from CMEpredict import *

In [6]:
type = 'gru'
time_window = 12
train_again = 1
train_data_file = './normalized_training_' + str(time_window) + '.csv'
test_data_file = './normalized_testing_' + str(time_window) + '.csv'
result_file = './' + type + '-' + str(time_window) + '-output.csv'
model_file = './' + type + '-' + str(time_window) + '-model.h5'
start_feature = 4
n_features, thresh = get_n_features_thresh(type, time_window)
mask_value = 0
series_len = 20
epochs = 20
batch_size = 256
nclass = 2

if train_again == 1:
    # Train
    print('loading training data...')
    X_train, y_train, nb_train = load_data(datafile=train_data_file,
                                            series_len=series_len,
                                            start_feature=start_feature,
                                            n_features=n_features,
                                            mask_value=mask_value,
                                            type=type,
                                            time_window=time_window)

    class_weights = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
    class_weight_ = {0: class_weights[0], 1: class_weights[1]}
    print('done loading training data...')

    if type == 'gru':
        model = gru(n_features, series_len)
    elif type == 'lstm':
        model = lstm(n_features, series_len)
    print('training the model, wait until it is finished...')
    model.compile(loss='binary_crossentropy',
                    optimizer='RMSprop',
                    metrics=['accuracy'])

    history = model.fit(X_train,
                        y_train,
                        epochs=epochs,
                        batch_size=batch_size,
                        verbose=False,
                        shuffle=True,
                        class_weight=class_weight_)
    print('finished...')
    model.save(model_file)
else:
    print('loading model...')
    model = load_model(model_file)
    print('done loading...')

# Test
print('loading testing data')
X_test, y_test, nb_test = load_data(datafile=test_data_file,
                                    series_len=series_len,
                                    start_feature=start_feature,
                                    n_features=n_features,
                                    mask_value=mask_value,
                                    type=type,
                                    time_window=time_window)
print('done loading testing data...')
print('predicting testing data...')
prob = model.predict(X_test,
                        batch_size=batch_size,
                        verbose=False,
                        steps=None)
print('done predicting...')
print('writing prediction results into file...')
df = pd.read_csv(test_data_file, header=None)
df_values0 = df.values
if type == 'gru':
    if time_window == 12:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 13, 7, 8, 15, 18, 21, 6, 9, 10, 17, 5, 16, 4, 12, 19, 20, 14]]  # 12   GRU
    elif time_window == 24:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 13, 15, 5, 20, 9, 21, 7, 8, 6, 17, 18, 10, 14, 4, 12, 16, 19]]  # 24   GRU
    elif time_window == 36:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 5, 13, 20, 9, 21, 15, 8, 7, 4, 6, 14, 12, 17, 10, 18, 16, 19]]  # 36   GRU
    elif time_window == 48:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 5, 13, 20, 9, 14, 8, 7, 21, 6, 4, 15, 12, 17, 16, 10, 18, 19]]  # 48   GRU
    elif time_window == 60:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 5, 13, 20, 7, 15, 8, 14, 6, 21, 4, 9, 12, 10, 19, 18, 16, 17]]  # 60   GRU
elif type == 'lstm':
    if time_window == 12:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 13, 20, 7, 15, 8, 21, 6, 18, 5, 10, 9, 17, 16, 19, 12, 14, 4]]  # 12   LSTM
    elif time_window == 24:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 20, 11, 13, 9, 15, 14, 8, 7, 5, 21, 6, 17, 18, 10, 12, 16, 4, 19]]  # 24   LSTM
    elif time_window == 36:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 20, 13, 5, 14, 8, 15, 7, 9, 21, 6, 4, 12, 17, 18, 10, 16, 19]]  # 36   LSTM
    elif time_window == 48:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 5, 20, 13, 9, 14, 7, 15, 8, 6, 4, 21, 12, 17, 18, 16, 10, 19]]  # 48   LSTM
    elif time_window == 60:
        df_values = df_values0[:,
                    [0, 1, 2, 3, 11, 5, 13, 20, 7, 15, 8, 14, 6, 21, 4, 9, 12, 10, 19, 18, 16, 17]]  # 60   LSTM
w = []
columns = ['Predicted Label', 'Label', 'Timestamp', 'NOAA AR NUM', 'HARP NUM',
                'TOTUSJH', 'TOTPOT', 'TOTUSJZ', 'ABSNJZH', 'SAVNCPP', 'USFLUX', 'AREA_ACR',
                'MEANPOT', 'R_VALUE', 'SHRGT45', 'MEANGAM', 'MEANJZH', 'MEANGBT', 'MEANGBZ',
                'MEANJZD', 'MEANGBH', 'MEANSHR', 'MEANALP'])
idx = 0
for i in range(len(df_values)):
    line = df_values[i].tolist()
    if line[0] == 'padding' or float(line[-5]) >= 3500 or float(line[-4]) >= 65536 \
            or abs(float(line[-1]) - float(line[-2])) > 70:
        continue
    has_zero_record = False
    # if one of the physical feature values is missing, then discard it.
    for k in range(start_feature, start_feature + n_features):
        if float(line[k]) == 0.0:
            has_zero_record = True
            break
    if has_zero_record:
        continue
    if prob[idx] >= thresh:
        line.insert(0, 'P')
    else:
        line.insert(0, 'N')
    idx += 1
    w.append(line)
import pandas as pd
data = pd.DataFrame(w, columns=columns)    
display(data)
print('done...')

loading training data...
done loading training data...
training the model, wait until it is finished...
finished...
loading testing data
done loading testing data...
predicting testing data...
done predicting...
writing prediction results into file...


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,Predicted Label,Label,Timestamp,NOAA AR NUM,HARP NUM,TOTUSJH,TOTPOT,TOTUSJZ,ABSNJZH,SAVNCPP,...,R_VALUE,SHRGT45,MEANGAM,MEANJZH,MEANGBT,MEANGBZ,MEANJZD,MEANGBH,MEANSHR,MEANALP
1,N,N,2016-02-11T22:58:09.90Z,12497,6327,0.4536,0.6945,1.029,-1.6783,-0.2331,...,0.5194,-0.7577,0.3925,-0.0754,1.2374,-0.2081,-0.0909,0.6876,0.1579,1.1869
2,N,N,2016-02-11T23:10:09.90Z,12497,6327,0.4527,0.6918,0.9703,-1.6798,-0.2328,...,1.1634,-0.7672,0.403,-0.1284,1.2349,-0.2254,-0.0821,0.6851,0.1416,1.1826
3,N,N,2016-02-11T23:22:09.90Z,12497,6327,0.4665,0.6852,1.1235,-1.8024,-0.2295,...,1.1432,-0.8262,0.4127,-0.1012,1.2517,-0.223,-0.0793,0.6877,0.1457,1.1898
4,N,N,2016-02-11T23:34:10.00Z,12497,6327,0.4679,0.6866,1.0207,-1.7795,-0.2292,...,1.2066,-0.8189,0.4183,-0.1595,1.2329,-0.2252,-0.0777,0.6774,0.1143,1.1921
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287,N,P,2016-04-17T23:34:41.00Z,12529,6483,0.7163,0.7819,0.263,-0.5814,1.3702,...,-0.0546,-0.1928,0.6014,0.4485,-0.6397,0.9327,1.2833,0.3667,0.5072,-0.5989
288,N,P,2016-04-17T23:46:41.00Z,12529,6483,0.7198,0.7709,0.2952,-0.6123,1.3693,...,0.1997,-0.2029,0.5919,0.4514,-0.6519,0.9399,1.2744,0.3666,0.5179,-0.5941
289,N,P,2016-04-17T23:58:41.00Z,12529,6483,0.7131,0.7757,0.3388,-0.622,1.3751,...,0.0527,-0.206,0.5842,0.4705,-0.6525,0.9489,1.2723,0.3666,0.5335,-0.5994
290,N,P,2016-04-18T00:10:41.00Z,12529,6483,0.6889,0.769,0.531,-0.654,1.3667,...,0.124,-0.2166,0.5667,0.4584,-0.6636,0.9428,1.2669,0.3631,0.516,-0.6131


done...
