# Weather data Prediction using TF2.0

### Importing libraries.

In [1]:
import os
import datetime

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

In [None]:
import tensorflow as tf

In [None]:
tf.__version__

In [None]:
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False
%matplotlib inline

### Importing data.

The data is downloaded from [source](https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip). The data can be downloaded and placed in the project or it can be done directly using following commands:


In [None]:
DATA_PATH = '../Data'

In [None]:
df = pd.read_csv(os.path.join(DATA_PATH, 'jena_climate_2009_2016.csv'))
df.head()

### Basic EDA with preprocessing and making the data ready for input to model


In [None]:
#slice [start:stop:step], from index 5 taking every 6th record (sub sampling data as the values are for eery 10mins converting data to every hour values)
df = df[5::6]

#storing date time values in a separate variable for future processing
date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')

df.head()


In [None]:
plot_cols = ['T (degC)', 'p (mbar)', 'rho (g/m**3)']
plot_features = df[plot_cols]
plot_features.index = date_time
_ = plot_features.plot(subplots= True)

In [None]:
plot_features = df[plot_cols][:480]
plot_features.index = date_time[:480]

_ = plot_features.plot(subplots = True)

Data cleaning

In [None]:
df.describe().transpose()

In [None]:
plt.scatter(date_time,df['wv (m/s)'])

In [None]:
#select garbage -9999.0 values of wv and max_wv and replacing them with 0
wv = df['wv (m/s)']
bad_wv = wv== -9999.0
wv[bad_wv] = 0.0 #replacing low NA values of wv with 0

max_wv = df['max. wv (m/s)']
bad_max_wv = max_wv == -9999.0
max_wv[bad_max_wv] = 0.0

#sanity check
df['wv (m/s)'].min()

Found certain useful features after doing EDA on the data. Adding those features to the data.

In [None]:
wv= df.pop('wv (m/s)')
max_wv = df.pop('max. wv (m/s)')

# convert to radians
wd_rad = df.pop('wd (deg)') * np.pi / 180

# Calculate the x and y components of the wind velocity
df['Wx'] = wv * np.cos(wd_rad)
df['Wy'] = wv * np.sin(wd_rad)

# Calculate the x and y components of max wind velocity
df['max Wx'] = max_wv * np.cos(wd_rad)
df['max Wy'] = max_wv * np.sin(wd_rad)

Converting date time into multiple features

In [None]:
timestamp_sec = date_time.map(datetime.datetime.timestamp)

day = 24 * 60 * 60
year = (365.2425) * day

df['Day sin'] = np.sin(timestamp_sec * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_sec * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_sec * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_sec * (2 * np.pi / year))

Splitting data into train:val:test in 70:20:10

In [None]:
# Dictionary of column names and their indices
column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
#  Splitting the dataset with a 70:20:10 split
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]
# Number of features in our dataset
num_features = df.shape[1]

Normalizing data

In [None]:
train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

##### Creating data window

In [None]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift, train_df=train_df, val_df=val_df, test_df=test_df, label_columns=None):
        self.train_df = train_df
        self.test_df = test_df
        self.val_df = val_df
        
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_column_indices = {name: i for i, name in enumerate(label_columns)}
        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}
            
        #Window parameters.
        self.input_width = input_width
        self.label_width = label_width 
        self.shift = shift
            
        self.total_window_size = input_width + shift
            
        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]
        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]
            
    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])

In [None]:
# sanity check of the function

w1 = WindowGenerator(input_width = 6, label_width =  1, shift = 1, label_columns = ['T (degC)'])
print(f'First Window: \n{w1}')