In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [2]:
df = pd.read_csv('temp_data2.csv')
df['latitude'] = df['latitude'].apply(lambda x: x[:-1])
df['longitude'] = df['longitude'].apply(lambda x: x[:-1])
df[['latitude', 'longitude']] = df[['latitude', 'longitude']].astype(float)
date_time = pd.to_datetime(df.pop('time'), format='%Y%m%d%H')
df.head()

Unnamed: 0,id,latitude,longitude,speed
0,0,128.0,904.0,60
1,0,134.0,906.0,75
2,0,140.0,907.0,85
3,0,145.0,908.0,65
4,0,150.0,909.0,60


In [3]:
cyc_id_ds = df.id.unique()
np.random.shuffle(cyc_id_ds)
n = len(cyc_id_ds)
train_ids = cyc_id_ds[0:int(n*0.7)]
val_ids = cyc_id_ds[int(n*0.7):int(n*0.9)]
test_ids = cyc_id_ds[int(n*0.9):]

train_df = df[0:int(n*0.7)]

In [4]:
train_df_collection = []
test_df_collection = []
val_df_collection = []

cid = df.pop('id')
MEAN = df.mean()
STD = df.std()
df = (df - MEAN) / STD
df = df.assign(id=cid)

for _df in [df.loc[df['id'] == id] for id in train_ids]:
    _df.pop('id')
    train_df_collection.append(_df)

for _df in [df.loc[df['id'] == id] for id in test_ids]:
    _df.pop('id')
    test_df_collection.append(_df)

for _df in [df.loc[df['id'] == id] for id in val_ids]:
    _df.pop('id')
    val_df_collection.append(_df)

In [16]:
print(STD)
print(MEAN)

latitude      48.353828
longitude    122.472912
speed         22.389924
dtype: float64
latitude     140.162766
longitude    784.186350
speed         40.016973
dtype: float64


In [18]:

class WindowGenerator:
    def __init__(
        self,
        input_width,
        label_width,
        shift,
        train_df,
        val_df,
        test_df,
        label_columns=None,
    ):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df

        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {
                name: i for i, name in enumerate(label_columns)
            }

        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return "\n".join(
            [
                f"Total window size: {self.total_window_size}",
                f"Input indices: {self.input_indices}",
                f"Label indices: {self.label_indices}",
                f"Label column name(s): {self.label_columns}",
            ]
        )

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]

        if self.label_columns is not None:
            labels = tf.stack(
                [
                    labels[:, :, self.column_indices[name]]
                    for name in self.label_columns
                ],
                axis=-1,
            )

        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])
        return inputs, labels

    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.utils.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
        )

        ds = ds.map(self.split_window)
        return ds

    @property
    def train(self):
        return self.make_dataset(self.train_df)

    @property
    def val(self):
        return self.make_dataset(self.val_df)

    @property
    def test(self):
        return self.make_dataset(self.test_df)


    


In [6]:
def create_tensor_timeseries_dataset(input_width, label_width, shift):
    train_ds = None
    for tdf in train_df_collection:
        w = WindowGenerator(input_width=input_width, label_width=label_width, shift=shift, train_df=tdf, val_df=None, test_df=None, label_columns=['speed'])
        if train_ds == None:
            train_ds = w.train
        else:
            train_ds = train_ds.concatenate(w.train)    

    val_ds = None
    for tdf in val_df_collection:
        w = WindowGenerator(input_width=input_width, label_width=label_width, shift=shift, train_df=tdf, val_df=None, test_df=None, label_columns=['speed'])
        if val_ds == None:
            val_ds = w.train
        else:
            val_ds = val_ds.concatenate(
        # Slicing doesn't preserve static shape information, so set the shapes
        # manually. This way the `tf.data.Datasets` are easier to inspect.
w.train)

    test_ds = None
    for tdf in test_df_collection:
        w = WindowGenerator(input_width=input_width, label_width=label_width, shift=shift, train_df=tdf, val_df=None, test_df=None, label_columns=['speed'])
        if test_ds == None:
            test_ds = w.train
        else:
            test_ds = test_ds.concatenate(w.train)
    
    return train_ds, val_ds, test_ds

In [7]:
train_ds, val_ds, test_ds = create_tensor_timeseries_dataset(4, 1, 1)

In [8]:
class Baseline(tf.keras.Model):
  def __init__(self, label_index=None):
    super().__init__()
    self.label_index = label_index

  def call(self, inputs):
    if self.label_index is None:
      return inputs
    result = inputs[:, :, self.label_index]
    return result[:, :, tf.newaxis]

In [9]:
baseline = Baseline(label_index=2)

baseline.compile(loss=tf.keras.losses.MeanSquaredError(),
                 metrics=[tf.keras.metrics.MeanAbsoluteError(), 'accuracy'])

val_performance = {}
performance = {}
prediction_error = {}
val_performance['Baseline'] = baseline.evaluate(val_ds)
performance['Baseline'] = baseline.evaluate(test_ds, verbose=0)



In [10]:
MAX_EPOCHS = 10


def compile_and_fit(model, patience=2):
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=patience, mode="min"
    )

    model.compile(
        loss=tf.keras.losses.MeanSquaredError(),
        optimizer=tf.keras.optimizers.Adam(),
        metrics=[
            tf.keras.metrics.MeanAbsoluteError(),
            tf.keras.metrics.RootMeanSquaredError(),
        ],
    )

    history = model.fit(
        train_ds, epochs=MAX_EPOCHS, validation_data=val_ds, callbacks=[early_stopping]
    )
    return history


def get_prediction_error(model, test_ds):
    count = 0
    diff = 0
    for i in test_ds.as_numpy_iterator():
        input, label = i
        prediction = np.array(model(input))

        for i, p in enumerate(prediction):
            p = p * STD["speed"] + MEAN["speed"]
            a = label[i] * STD["speed"] + MEAN["speed"]
            diff += np.abs(a - p)
            count += 1
    
    return diff/count


In [11]:
linear = tf.keras.Sequential([
    tf.keras.layers.Dense(units=4),
    tf.keras.layers.Dense(units=1)
])

history = compile_and_fit(linear)

val_performance['Linear'] = linear.evaluate(val_ds)
performance['Linear'] = linear.evaluate(test_ds, verbose=0)
prediction_error['Linear'] = get_prediction_error(linear, test_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
dense = tf.keras.Sequential([
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

history = compile_and_fit(dense)

val_performance['Dense'] = dense.evaluate(val_ds)
performance['Dense'] = dense.evaluate(test_ds, verbose=0)
prediction_error['Dense'] = get_prediction_error(dense, test_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
CONV_WIDTH = 4
conv_model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=32, kernel_size=(CONV_WIDTH,), activation='relu'),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1),
])

history = compile_and_fit(conv_model)

val_performance['Conv'] = conv_model.evaluate(val_ds)
performance['Conv'] = conv_model.evaluate(test_ds, verbose=0)
prediction_error['Conv'] = get_prediction_error(linear, test_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


In [14]:
lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(10, recurrent_activation='relu', return_sequences=False),
    tf.keras.layers.Dense(units=1),
])

history = compile_and_fit(lstm_model)

val_performance['LSTM'] = lstm_model.evaluate(val_ds)
performance['LSTM'] = lstm_model.evaluate(test_ds, verbose=0)
prediction_error['LSTM'] = get_prediction_error(lstm_model, test_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
import pickle
with open('cyclone_intensity_prediction.pickle','wb') as f:
    pickle.dump(lstm_model,f)