In [116]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [117]:
def prepare_data(data_path: str, city_name: str, x_len: int, y_len: int, train_size: float) -> list:
    """Preprocess data and return train and test sequences.

    Args:
        data_path (str): Path to data file.
        city_name (str): Name of city for which data will be extracted.
        x_len (int): Length of single X sequence.
        y_len (int): Length of single y sequence.
        train_size (float): Specifies proportion of train data to all data.

    Returns:
       (list): List containing train-test split of data.
    """

    # Read data
    df: pd.DataFrame = pd.read_csv(data_path, usecols=['datetime', city_name]).dropna()

    # Calculate mean value for each day
    df['date'] = pd.to_datetime(df['datetime'])
    data: np.ndarray = df.groupby(df['date'].dt.date)[city_name].mean().to_numpy()

    # Prepare X and y sequences
    num_sequences: int = data.size + 1 - x_len - y_len
    X = np.zeros((num_sequences, x_len), dtype=np.float32)
    y = np.zeros((num_sequences, y_len), dtype=np.float32)
    for i in range(num_sequences):
        X[i] = data[i:(i+x_len)]
        y[i] = data[i+x_len:(i+x_len+y_len)]

    # Return train and test sequences
    train_len = int(num_sequences * train_size)
    return [X[:train_len], X[train_len:], y[:train_len], y[train_len:]]

In [120]:
data_path = './data/temperature.csv'
city_name = 'Vancouver'
x_len = 4
y_len = 4
train_size = 0.8
X_train, X_test, y_train, y_test = prepare_data(data_path, city_name, x_len, y_len, train_size)

In [None]:
def draw_training_process(history, key1='mse') -> None:
    """Draws a plot of training process.

    Args:
      history (keras.callbacks.History): object that contains information about the training process.
      key1 (str): Specifies the key in 'history.history' to plot on the y-axis, for example "mse".

    Returns:
      None
    """
    plt.figure(figsize=(12,6))
    plt.plot(np.log(history.history[key1]), "r--")
    plt.plot(np.log(history.history['val_' + key1]), "g--")
    plt.ylabel(key1)
    plt.xlabel('Epoch')
    plt.title(f"Training process")
    plt.legend(['train', 'test'], loc='best')
 
    plt.show()

def metrics_and_plots(history,y_true,y_pred,y_len) -> None:
  """Draws plots of weather predictions and calculates evaluation metrics.

  Args:
    history (keras.callbacks.History): object that contains information about the training process.
    y_true (np.array): A numpy array of shape (n_samples, n_features) that contains true values of weather variables.
    y_pred (np.array): A numpy array of shape (n_samples, n_features) that contains predicted values of weather variables.
    y_len (int): Specifies the number of days to plot and calculate metrics for, at our place 4.

  Returns:
    None
  """
  #draws a plot of training process
  draw_training_process(history,key1='mse')

  #draws plots of weather predictions
  print("Weather predictions")
  fig, axes = plt.subplots(y_len, 1, figsize=(8, 24), sharex=False)
  for i in range(y_len):
    axes[i].plot(y_test[:, i], label='True values')
    axes[i].plot(y_pred[:, i], label='Predicted values')
    axes[i].set_xlabel("Day in year")
    axes[i].set_ylabel("Temperature (K)")
    axes[i].legend()
    axes[i].set_title(f"Day {i+1}")
  plt.show()

  #calculate metrics 
  print("Metrics for 4 consecutive days")
  for i in range(y_len):
    y_true = y_test[:, i]
    y_pred_day = y_pred[:,i]
    rmse = np.sqrt(mean_squared_error(y_true, y_pred_day))
    mse = mean_squared_error(y_true, y_pred_day)
    mae = mean_absolute_error(y_true, y_pred_day)
    r2 = r2_score(y_true, y_pred_day)
    pearson, _ = pearsonr(y_true, y_pred_day)
    ia = (1 -(np.sum((y_pred_day-y_true)**2))/(np.sum((np.abs(y_true-np.mean(y_pred_day))+np.abs(y_pred_day-np.mean(y_pred_day)))**2)))
    print(f"Day {i+1}: RMSE={rmse:.4f}, MSE={mse:.4f}, MAE={mae:.4f}, R^2={r2:.4f}, Pearson={pearson:.4f}, IA={ia:.4f}")
