In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers       
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [4]:
df = pd.read_csv("data/philly.csv")
df.drop('Sunrise', inplace=True, axis=1)
df.drop('Sunset', inplace=True, axis=1)
df.drop('Sky Conditions', inplace=True, axis=1)
df.drop('Pressure Change', inplace=True, axis=1)
df.drop('Pressure Tendency', inplace=True, axis=1)
df['Precipitation'] = df["Precipitation"].fillna(0)
df.head()

Unnamed: 0,DATE,Dry Bulb Temp,Altitude,Dew Point Temp,Precipitation,Humidity,Station Pressure,Visibility,Wet Bulb Temp,Wind Direction,Wind Speed
0,01.01.2018 00:54,10.0,30.41,-5.0,0.0,50.0,30.38,10.0,7.0,300.0,6.0
1,01.01.2018 01:00,10.0,,-5.0,0.0,50.0,30.35,9.94,7.0,300.0,6.0
2,01.01.2018 01:54,9.0,30.42,-5.0,0.0,52.0,30.39,10.0,7.0,320.0,7.0
3,01.01.2018 02:54,10.0,30.42,-6.0,0.0,48.0,30.39,10.0,7.0,310.0,8.0
4,01.01.2018 03:54,9.0,30.42,-6.0,0.0,50.0,30.39,10.0,6.0,310.0,7.0


In [5]:
df.count()

DATE                40590
Dry Bulb Temp       39447
Altitude            35099
Dew Point Temp      39445
Precipitation       40590
Humidity            39445
Station Pressure    39124
Visibility          39452
Wet Bulb Temp       39122
Wind Direction      38391
Wind Speed          39448
dtype: int64

In [6]:
keys = [
    "Dry Bulb Temp",
    "Altitude",
    "Dew Point Temp",
    "Precipitation",
    "Humidity",
    "Station Pressure",
    "Visibility",
    "Wet Bulb Temp",
    "Wind Direction",
    "Wind Speed",
]

In [7]:
df = df.dropna(how='any',axis=0)
df.count()

DATE                33827
Dry Bulb Temp       33827
Altitude            33827
Dew Point Temp      33827
Precipitation       33827
Humidity            33827
Station Pressure    33827
Visibility          33827
Wet Bulb Temp       33827
Wind Direction      33827
Wind Speed          33827
dtype: int64

In [8]:
df.dtypes

DATE                 object
Dry Bulb Temp       float64
Altitude            float64
Dew Point Temp      float64
Precipitation       float64
Humidity            float64
Station Pressure    float64
Visibility          float64
Wet Bulb Temp       float64
Wind Direction      float64
Wind Speed          float64
dtype: object

In [9]:
for key in keys[0:]:
    print(key)
    df[key] = df[key].astype(float)

Dry Bulb Temp
Altitude
Dew Point Temp
Precipitation
Humidity
Station Pressure
Visibility
Wet Bulb Temp
Wind Direction
Wind Speed


In [10]:
usableFeatures = [keys[i] for i in [0,1,2,3,4,5,9]]
features = df[usableFeatures]
features.index = df["DATE"]
features.head()

Unnamed: 0_level_0,Dry Bulb Temp,Altitude,Dew Point Temp,Precipitation,Humidity,Station Pressure,Wind Speed
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01.01.2018 00:54,10.0,30.41,-5.0,0.0,50.0,30.38,6.0
01.01.2018 01:54,9.0,30.42,-5.0,0.0,52.0,30.39,7.0
01.01.2018 02:54,10.0,30.42,-6.0,0.0,48.0,30.39,8.0
01.01.2018 03:54,9.0,30.42,-6.0,0.0,50.0,30.39,7.0
01.01.2018 04:54,9.0,30.43,-6.0,0.0,50.0,30.4,9.0


In [11]:
x = features.values # into numpy
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)
temp_avg = df["Dry Bulb Temp"].mean()
temp_std = df["Dry Bulb Temp"].std()

In [12]:
# Create training set and testing set
split = int(len(df) * 0.9)
train_set = x_scaled[:split]
test_set = x_scaled[split:]

In [13]:
# 12-hour prediction using last 48 hours of data
x_train = []
y_train = []
n_past = 24
n_future = 12

for i in range(0, len(train_set)-n_past-n_future+1):
    x_train.append(train_set[i:i+n_past, 0])
    y_train.append(train_set[i+n_past:i+n_past+n_future, 0])
    
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.expand_dims(x_train, axis=-1)

In [18]:
class BobNet(keras.Model):
    def __init__(self, inp_shape):  
        super().__init__()
        self.lstm = keras.Sequential([
          layers.Bidirectional(layers.LSTM(units=24, input_shape=inp_shape, return_sequences=True)),
          layers.Dropout(0.2),
          layers.LSTM(units=24, return_sequences=True),
          layers.Dropout(0.2),
          layers.LSTM(units=24),
          layers.Dropout(0.2),
          layers.Dense(units=12, activation="relu")
        ])

    def call(self, x):
        x = self.lstm(x)
        return x  

input_shape = (x_train.shape[1], 1)
model = BobNet(input_shape)
model.compile(loss="mean_squared_error", optimizer="adam", metrics=["accuracy"])
model.fit(x_train, y_train, epochs=25, batch_size=256)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x7f4d8c2508b0>

In [43]:
# model.save("weather_model")

In [19]:
# This only tests first example
x_test = test_set[:n_past, 0]
x_test = np.expand_dims(x_test, axis=(0, 2))
y_test = test_set[n_past:n_past+n_future, 0]

In [20]:
y_pred = model.predict(x_test)
print("Predicted temperature", y_pred) 
print("Real temperature", y_test)

Predicted temperature [[0.7386519  0.74586457 0.746618   0.75181794 0.7551838  0.75788563
  0.7628144  0.764688   0.7668088  0.76772106 0.76617974 0.76887584]]
Real temperature [0.72826087 0.72826087 0.72826087 0.72826087 0.72826087 0.72826087
 0.72826087 0.72826087 0.73913043 0.73913043 0.72826087 0.72826087]


In [21]:
def convert_to_f(x):
    return (x * temp_std) + temp_avg
y_pred = scaler.fit_transform(y_pred)
y_pred = convert_to_f(y_pred)
y_test = convert_to_f(y_test)
y_pred, y_test

(array([[56.88917, 56.88917, 56.88917, 56.88917, 56.88917, 56.88917,
         56.88917, 56.88917, 56.88917, 56.88917, 56.88917, 56.88917]],
       dtype=float32),
 array([69.4595733 , 69.4595733 , 69.4595733 , 69.4595733 , 69.4595733 ,
        69.4595733 , 69.4595733 , 69.4595733 , 69.64719124, 69.64719124,
        69.4595733 , 69.4595733 ]))

In [22]:
x_test

array([[[0.72826087],
        [0.73913043],
        [0.75      ],
        [0.75      ],
        [0.76086957],
        [0.77173913],
        [0.77173913],
        [0.7826087 ],
        [0.80434783],
        [0.79347826],
        [0.79347826],
        [0.79347826],
        [0.79347826],
        [0.7826087 ],
        [0.77173913],
        [0.77173913],
        [0.77173913],
        [0.76086957],
        [0.75      ],
        [0.73913043],
        [0.72826087],
        [0.73913043],
        [0.73913043],
        [0.73913043]]])