<a href="https://colab.research.google.com/github/PoonamDevle/weather-prediction/blob/main/predicting_weather.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
df = pd.read_csv('/content/POWER_Point_Hourly_20130101_20221231_022d2604N_084d8536E_LST (1).csv', skiprows= 13)
df

In [None]:
df['YEAR'] = df.YEAR.astype(str)
df['MO'] = df.MO.astype(str)
df['DY'] = df.DY.astype(str)
df['HR'] = df.HR.astype(str)

df['date'] = df['DY'].str.cat(df['MO'], sep = '/')
df['DATE'] = df['date'].str.cat(df['YEAR'], sep = '/')
df['TIME'] = df['HR'] + ':0:0'
df

In [None]:
df['DATETIME'] = df['DATE'] + " " + df['TIME']
df.index = pd.to_datetime(df['DATETIME'], format='%d/%m/%Y %H:%M:%S')
df

In [None]:
df.drop(columns=['date', 'DATE','TIME', 'DATETIME'], inplace = True)

In [None]:
df.describe()


In [None]:
df['T2M'] = df.T2M.astype(int)
df['T2MDEW'] = df.T2MDEW.astype(int)
df['PRECTOTCORR'] = df.PRECTOTCORR.astype(int)
df['WS10M'] = df.WS10M.astype(int)
df['PS'] = df.PS.astype(int)



In [None]:
df[df['T2M'] == -999]

In [None]:
df.replace(-999.000000, np.nan, inplace = True)

df.isna().sum().sum()

In [None]:
df.interpolate(axis = 0, inplace = True)
df.loc["2022-11-18"]

In [None]:
df['T2M'].plot()

In [None]:
df['T2MDEW'].plot()

In [None]:
df['WS10M'].plot()

In [None]:
df['PRECTOTCORR'].plot()

In [None]:
df['PS'].plot()

In [None]:
df['Seconds'] = df.index.map(pd.Timestamp.timestamp)
df

In [None]:
day = 60*60*24
year = 365.2425*day

df['Day sin'] = np.sin(df['Seconds'] * (2* np.pi / day))
df['Day cos'] = np.cos(df['Seconds'] * (2 * np.pi / day))
df['Year sin'] = np.sin(df['Seconds'] * (2 * np.pi / year))
df['Year cos'] = np.cos(df['Seconds'] * (2 * np.pi / year))
df.head()

In [None]:
df.drop(['YEAR', 'MO', 'DY', 'HR', 'Seconds'], axis = 1, inplace = True)
df

In [None]:
def split_data(data, validation_fraction, testing_fraction):
    # Calculate the indices at which to split the data
    split_index_1 = int(len(data) * (1 - validation_fraction - testing_fraction))
    split_index_2 = int(len(data) * (1 - testing_fraction))

    # Split the data into training, validation, and testing sets
    train_data = data[:split_index_1]
    val_data = data[split_index_1:split_index_2]
    test_data = data[split_index_2:]

    return train_data, val_data, test_data

In [None]:
# [[[1], [2], [3], [4], [5]]] [6]
# [[[2], [3], [4], [5], [6]]] [7]
# [[[3], [4], [5], [6], [7]]] [8]

def df_to_X_y(df, window_train=7):
  df_as_np = df.to_numpy()
  X = []
  y = []
  for i in range(len(df_as_np)-window_train):
    row = [r for r in df_as_np[i:i+window_train]]
    X.append(row)
    label = [df_as_np[i+window_train][0], df_as_np[i+window_train][1], df_as_np[i+window_train][2], df_as_np[i+window_train][3],df_as_np[i+window_train][4]]
    y.append(label)
  return np.array(X), np.array(y)

In [None]:
WINDOW_SIZE = 7
X1, y1 = df_to_X_y(df, WINDOW_SIZE)
X1.shape, y1.shape

In [None]:

X_train1, y_train1 = X1[:70000], y1[:70000]
X_val1, y_val1 = X1[70000:78000], y1[70000:78000]
X_test1, y_test1 = X1[78000:], y1[78000:]
X_train1.shape, y_train1.shape, X_val1.shape, y_val1.shape, X_test1.shape, y_test1.shape


In [None]:
p_training_mean = np.mean(X_train1[:, :, 4])
p_training_std = np.std(X_train1[:, :, 4])

temp_training_mean = np.mean(X_train1[:, :, 0])
temp_training_std = np.std(X_train1[:, :, 0])

dew_training_mean = np.mean(X_train1[:, :, 1])
dew_training_std = np.std(X_train1[:, :, 1])

precip_training_mean = np.mean(X_train1[:, :, 2])
precip_training_std = np.std(X_train1[:, :, 2])

wind_training_mean = np.mean(X_train1[:, :, 3])
wind_training_std = np.std(X_train1[:, :, 3])


def preprocess(X):
  X[:, :, 0] = (X[:, :, 0] - temp_training_mean) / temp_training_std
  X[:, :, 1] = (X[:, :, 1] - dew_training_mean) / dew_training_std
  X[:, :, 0] = (X[:, :, 2] - precip_training_mean) / precip_training_std
  X[:, :, 1] = (X[:, :, 3] - wind_training_mean) / wind_training_std
  X[:, :, 0] = (X[:, :, 4] - p_training_mean) / p_training_std
  return X

def preprocess_output(y):
  y[:, 0] = (y[:, 0] - temp_training_mean) / temp_training_std
  y[:, 1] = (y[:, 1] - dew_training_mean) / dew_training_std
  y[:, 0] = (y[:, 2] - precip_training_mean) / precip_training_std
  y[:, 1] = (y[:, 3] - wind_training_mean) / wind_training_std
  y[:, 0] = (y[:, 4] - p_training_mean) / p_training_std
  return  y

In [None]:
preprocess(X_train1)
preprocess(X_val1)
preprocess(X_test1)

preprocess_output(y_train1)
preprocess_output(y_val1)
preprocess_output(y_test1)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam

model1 = Sequential()
model1.add(InputLayer((7, 9)))
model1.add(LSTM(64, return_sequences=True))
model1.add(LSTM(128))
model1.add(Dense(8, 'relu'))
model1.add(Dense(5, 'linear'))

model1.summary()

cp1 = ModelCheckpoint('model1/', save_best_only=True)
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=RootMeanSquaredError())

In [None]:
model1.fit(X_train1, y_train1, validation_data=(X_val1, y_val1), epochs=20, callbacks=[cp1])

In [None]:
from tensorflow.keras.models import load_model
model1 = load_model('model1/')

In [None]:
#df2 = plot_predictions2(model1, X_train1, y_train1)

In [None]:
def postprocess_temp(arr):
  arr = (arr*temp_training_std) + temp_training_mean
  return arr

def postprocess_p(arr):
  arr = (arr*p_training_std) + p_training_mean
  return arr

def postprocess_dew(arr):
  arr = (arr*dew_training_std) + dew_training_mean
  return arr

def postprocess_wind(arr):
  arr = (arr*wind_training_std) + wind_training_mean
  return arr

def postprocess_precip(arr):
  arr = (arr*precip_training_std) + precip_training_mean
  return arr

In [None]:
def plot_predictions2(model, X, y, start=0, end=1000):
  predictions = model.predict(X)
  p_preds, temp_preds, dew_preds, wind_preds, precip_preds = postprocess_p(predictions[:, 4]), postprocess_temp(predictions[:, 0]), postprocess_dew(predictions[:, 1]), postprocess_wind(predictions[:, 3]), postprocess_precip(predictions[:, 2])
  p_actuals, temp_actuals, dew_actuals, wind_actuals, precip_actuals = postprocess_p(y[:, 4]), postprocess_temp(y[:, 0]), postprocess_dew(y[:, 1]), postprocess_wind(y[:, 3]), postprocess_precip(y[:, 2])
  df2 = pd.DataFrame(data={'Temperature Predictions': temp_preds,
                          'Temperature Actuals':temp_actuals,
                          'Pressure Predictions': p_preds,
                          'Pressure Actuals': p_actuals,
                          'Dew Predictions': dew_preds,
                          'Dew Actuals':dew_actuals,
                          'Wind Predictions': wind_preds,
                          'Wind Actuals': wind_actuals,
                          'Precipitation Predictions': precip_preds,
                          'Precipitation Actuals': precip_actuals,})
  fig1, ax1 = plt.subplots()
  ax1.plot(df2['Temperature Predictions'][start:end], color = 'green', label = 'Temperature Predictions')
  ax1.plot(df2['Temperature Actuals'][start:end], color = 'red', label = 'Temperature Actuals')
  ax1.legend(loc = 'lower left')
  plt.show()

  fig2, ax2 = plt.subplots()
  ax2.plot(df2['Dew Predictions'][start:end], color = 'green', label = 'Dew Predictions')
  ax2.plot(df2['Dew Actuals'][start:end], color = 'red', label = 'Dew Actuals')
  ax2.legend(loc = 'upper left')
  plt.show()

  fig3, ax3 = plt.subplots()
  ax3.plot(df2['Pressure Predictions'][start:end], color = 'green', label = 'Pressure Predictions')
  ax3.plot(df2['Pressure Actuals'][start:end], color = 'red', label = 'Pressure Actuals')
  ax3.legend(loc = 'lower left')
  plt.show()

  fig4, ax4 = plt.subplots()
  ax4.plot(df2['Wind Predictions'][start:end], color = 'green', label = 'Wind Predictions')
  ax4.plot(df2['Wind Actuals'][start:end], color = 'red', label = 'Wind Actuals')
  ax4.legend(loc = 'upper left')
  plt.show()

  fig5, ax5 = plt.subplots()
  ax5.plot(df2['Precipitation Predictions'][start:end], color = 'green', label = 'Precipitation Predictions')
  ax5.plot(df2['Precipitation Actuals'][start:end], color = 'red', label = 'Precipitation Actuals')
  ax5.legend(loc = 'upper left')
  plt.show()
# Combine all the operations and display
 #plt.show()
  #plt.plot(df2['Temperature Predictions'][start:end])
  #plt.plot(df2['Temperature Actuals'][start:end])
  #plt.plot(df3['Pressure Predictions'][start:end])
  #plt.plot(df3['Pressure Actuals'][start:end])
  return df2[start:end]

In [None]:
post_processed_df = plot_predictions2(model1, X_train1, y_train1)
post_processed_df


In [None]:
post_processed_df = plot_predictions2(model1, X_test1, y_test1)
post_processed_df



In [None]:
input_data = []
print('Enter the weather parameters of previous days: ')

attr1 = float(input("Enter temperature: "))
attr2 = float(input("Enter surface pressure: "))
attr3 = float(input("Enter dewpoint: "))
attr4 = float(input("Enter wind speed: "))
attr5 = float(input("Enter precipitation: "))

input_data.append(attr1)
input_data.append(attr2)
input_data.append(attr3)
input_data.append(attr4)
input_data.append(attr5)

input_data = np.array(input_data)
input_data.shape = (1,5)
print('Input Data', input_data)
input_data = transform(input_data)

pred1 = model.predict(input_data)
pred2 = scaler.inverse_transform(pred1)
pd.DataFrame(pred2)
