In [0]:
# Initial necessity
import os
from google.colab import drive

In [4]:
# Get data and files from drive (or from githib if comment the first two lines and uncomment the rest)
drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/storm_forecast')
# !git clone https://github.com/ramp-kits/storm_forecast
# os.chdir('/storm_forecast')
# !pip install -r requirements.txt
# !python download_data.py


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Setup the necessary libraries and get data
!pip install git+https://github.com/paris-saclay-cds/ramp-workflow

Collecting git+https://github.com/paris-saclay-cds/ramp-workflow
  Cloning https://github.com/paris-saclay-cds/ramp-workflow to /tmp/pip-req-build-gnphr0ks
Building wheels for collected packages: ramp-workflow
  Running setup.py bdist_wheel for ramp-workflow ... [?25l- \ | / - done
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-08de0e_a/wheels/35/a2/c3/7969a73ddfefc0dcad3709cb7a81f52fb90348df9bb9b8c455
Successfully built ramp-workflow
Installing collected packages: ramp-workflow
Successfully installed ramp-workflow-0.2.0+49.gc22e3ee


In [0]:
#Feature Training Class
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler

class FeatureExtractor(object):
    def __init__(self):
        self.scalar_fields = ['instant_t', 'windspeed', 'latitude', 'longitude',
                       'hemisphere', 'Jday_predictor', 'initial_max_wind',
                       'max_wind_change_12h', 'dist2land']
        self.spatial_fields = ["u", "v", "sst", "slp", "hum","z","vo700"]
        self.scaling_values = pd.DataFrame(index=self.spatial_fields,
                                           columns=["mean", "std"], dtype=float)
        self.scalar_norm = MinMaxScaler()

    def fit(self, X_df, y):
        field_grids = []
        self.y_max = np.amax(np.array(y))
        self.y_min = np.amin(np.array(y))
        for field in self.spatial_fields:
            f_cols = X_df.columns[X_df.columns.str.contains(field + "_")]
            f_data = X_df[f_cols].values.reshape(-1, 11, 11)
            field_grids.append(f_data)
        for f, field in enumerate(self.spatial_fields):
            self.scaling_values.loc[field, "mean"] = np.nanmean(field_grids[f])
            self.scaling_values.loc[field, "std"] = np.nanstd(field_grids[f])
            self.scaling_values.loc[field, "min"] = np.nanmin(field_grids[f])
            self.scaling_values.loc[field, "max"] = np.nanmax(field_grids[f])
        self.scalar_norm.fit(X_df[self.scalar_fields])

    def transform(self, X_df):
        field_grids = []
        for field in self.spatial_fields:
            f_cols = X_df.columns[X_df.columns.str.contains(field + "_")]
            f_data = X_df[f_cols].values.reshape(-1, 11, 11)
            field_grids.append((f_data - self.scaling_values.loc[field, "min"]) / ( self.scaling_values.loc[field, "max"]- self.scaling_values.loc[field, "min"]))
            field_grids[-1][np.isnan(field_grids[-1])] = 0
        norm_data = np.stack(field_grids, axis=-1)
        norm_scalar = self.scalar_norm.transform(X_df[self.scalar_fields])
        return [[norm_data, norm_scalar], self.y_max, self.y_min]


In [0]:
#Regressor Class
from keras.layers import Concatenate, Dropout, BatchNormalization, Conv2D, Activation, Dense, Input, MaxPooling2D, Flatten, Dropout, PReLU
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras.regularizers import l1, l2, l1_l2
from sklearn.base import BaseEstimator
from keras.optimizers import RMSprop, Nadam, Adam, SGD

import numpy as np
class Regressor(BaseEstimator):
    def __init__(self):
        # define model
        l2_weight = 0.002
        l1_weight = 0.002
        model_in = Input(shape=(11, 11, 7))
        scalar_in = Input(shape=(9,)) 
 
        model = BatchNormalization()(model_in)
        model = Conv2D(36, (5, 5), padding="same")(model)
        model = PReLU()(model)
        model = MaxPooling2D()(model)
 
        model = BatchNormalization()(model)
        model = Conv2D(128, (3,3), padding="same")(model)
        model = PReLU()(model)
 
        model = MaxPooling2D()(model)
        model = BatchNormalization()(model)
        model = Conv2D(192, (3,3), padding="same")(model)
        model = PReLU()(model)
        model = Flatten()(model)
 
        model = Dense(256, kernel_regularizer=l1_l2(l1_weight,l2_weight))(model)
        model = PReLU()(model)
 
        model = Concatenate()([model, scalar_in, scalar_in, scalar_in])
        model = BatchNormalization()(model)
 
        model = Dense(64, kernel_regularizer=l1_l2(l1_weight,l2_weight))(model)
        model = PReLU()(model)
 
        model = Dense(32, kernel_regularizer=l1_l2(l1_weight,l2_weight))(model)
        model = PReLU()(model)
      
        model = Dense(16, kernel_regularizer=l1_l2(l1_weight,l2_weight))(model)
        model = PReLU()(model)
        
        model = Dense(16, kernel_regularizer=l1_l2(l1_weight,l2_weight))(model)
        model = PReLU()(model)
        
        model = Dense(1)(model)        
 
        self.cnn_model = Model([model_in, scalar_in], model)
        self.cnn_model.compile(loss="mse", optimizer=Adam())
 
        print(self.cnn_model.summary())
        return
    def fit(self, X, y):
        X, y_max, y_min = X
        _, x = X
        self.y_max = y_max
        self.y_min = y_min
        y = (y-y_min)/(y_max-y_min) - x[:,1]
        callback = [EarlyStopping(monitor='val_loss', min_delta=.001, patience=10)]
        self.cnn_model.fit(X, y, epochs=100, batch_size=200, verbose=1, validation_split = .1, callbacks = callback)
    
    def predict(self, X): 
        X, y_max, y_min = X
        _ , x = X
        return (self.cnn_model.predict(X).ravel() + x[:,1])*(self.y_max-self.y_min)+self.y_min
    

In [0]:
#Gets the Data
from problem import get_train_data, get_test_data
data_train, y_train = get_train_data()
data_test, y_test = get_test_data()

In [0]:
#Train the Model
training_features = FeatureExtractor()
training_features.fit(data_train, y_train)
X = training_features.transform(data_train)
reg = Regressor()
reg.fit(X, y_train)

In [30]:
#Testing Results
X = training_features.transform(data_test)
print('The RMSE of the test dataset is %2.3f' % np.sqrt(np.mean((reg.predict(X)-y_test)**2)))

The RMSE of the test dataset is 20.007
