# Weather prediction using deep neural networks

First I need to read the CSV file, and the float number which are represented in the following format: 10,2 convert to 10.2 and also change its type from str to float.

In [1]:
import pandas as pd
import re
import csv

weather_data = pd.read_csv("weather_data.csv",sep=';')

features = ['Day','Tmax','Tmin','Tmean'] # the actal day when the measure was, Temperature max, T.min, Average
df = pd.DataFrame(weather_data, columns=features)

for index, row in df.iterrows():
    row[0] = pd.to_datetime(row[0])
    row[0] = row[0].timestamp()
    row[1] = float((row[1]).replace(',','.')) #converting the strings to float numbers
    row[2] = float((row[2]).replace(',','.'))
    row[3] = float((row[3]).replace(',','.'))

print(df)

             Day  Tmax  Tmin  Tmean
0    1.57784e+09     3   0.2    1.6
1    1.57792e+09   3.3  -0.5    1.4
2    1.57801e+09   2.4  -0.7   0.85
3     1.5781e+09     1   1.1   1.05
4    1.57818e+09   3.5  -0.7    1.4
..           ...   ...   ...    ...
293  1.60315e+09  14.8   9.6   12.2
294  1.60324e+09  16.6   8.9  12.75
295  1.60332e+09  15.7  10.3     13
296  1.60341e+09  16.4  10.8   13.6
297   1.6035e+09  17.9  12.1     15

[298 rows x 4 columns]


# Now I have the cleared data I will create an LSTM network
I will use 80% of the data to train, 10% for test and 10% for validation

In [15]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Bidirectional, LSTM, Dropout

X = df[[col for col in df.columns if col not in ['Tmean']]]
y = df['Tmean']


X_train, X_other, y_train, y_other = train_test_split(X,y, train_size=0.8, random_state=23)
X_test, X_val, y_test, y_val = train_test_split(X_other, y_other, test_size=0.5, random_state=23)
X_train.shape, X_test.shape, X_val.shape
print(f"Training instances   {X_train.shape}, Training features   {X_train.shape}")
print(f"Validation instances {X_val.shape}, Validation features { X_val.shape}")
print(f"Testing instances    {X_test.shape}, Testing features    {X_test.shape}")


#Convert data to numpy array
X_train = X_train.to_numpy()
y_train = y_train.to_numpy()
X_test = X_test.to_numpy()
y_test = y_test.to_numpy()
X_val= X_val.to_numpy()
y_val = y_val.to_numpy()

#Reshape input data to have 3 dimensions
X_train = X_train.reshape(-1,3,1)
X_test = X_test.reshape(-1,3,1)
X_val= X_val.reshape(-1,3,1)


#Converting data to tensor
X_train_tensor = tf.convert_to_tensor(X_train, dtype=float)
y_train_tensor = tf.convert_to_tensor(y_train,dtype=float)
X_test_tensor = tf.convert_to_tensor(X_test,dtype=float)
y_test_tensor = tf.convert_to_tensor(y_test,dtype=float)
X_val_tensor = tf.convert_to_tensor(X_val,dtype=float)
y_val_tensor = tf.convert_to_tensor(y_val, dtype=float)

EPOCHS = 20
BATCH_SIZE = 1


model = Sequential()
model.add(Bidirectional(LSTM(units=30, return_sequences=True, input_shape = (X_train_tensor.shape[0], X_train_tensor.shape[1],1))))
model.add(Dropout(0.2))
model.add(LSTM(units= 30, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units= 30, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units= 30))
model.add(Dropout(0.2))
model.add(Dense(units = 30, activation='relu'))

model.compile(optimizer='adam',loss='mean_squared_error',metrics=['acc'])

model.fit(X_train_tensor,y_train_tensor, epochs=EPOCHS,batch_size=BATCH_SIZE, validation_data=(X_val_tensor,y_val_tensor))


Training instances   (238, 3), Training features   (238, 3)
Validation instances (30, 3), Validation features (30, 3)
Testing instances    (30, 3), Testing features    (30, 3)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x238bc2d93a0>

# Predicting temperature


In [16]:
predicted_temperature = model.predict(X_test_tensor)
print(f"Predicted temperature for 28.october : {np.mean(predicted_temperature[0],axis=0) }")
print(f"Predicted temperature for 3.november : {np.mean(predicted_temperature[6],axis=0) }")
print(f"Predicted temperature for 24.november : {np.mean(predicted_temperature[29],axis=0)}")

Predicted temperature for 28.october : 12.25357437133789
Predicted temperature for 3.november : 24.650575637817383
Predicted temperature for 24.november : 21.069730758666992
