In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import multiprocessing

In [None]:
def try_formats(date):
    for fmt in ('%m/%d/%Y', '%m-%d-%Y'):
        try:
            return pd.to_datetime(date, format=fmt)
        except ValueError:
            continue
    return pd.NaT


df = pd.read_csv("gingers.csv")
df2 = pd.read_csv("climate.csv")

df.dropna(subset=['Date'], inplace=True)
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df.set_index('Date', inplace=True)
df = df.resample('D').asfreq()
df = df.interpolate(method='linear')

df2['Date'] = df2['Date'].apply(try_formats)
df2.dropna(subset=['Date'], inplace=True)  # Drop rows with NaT in 'Date'

df2.set_index('Date', inplace=True)
df2 = df2.resample('D').asfreq()
df2 = df2.interpolate(method='linear')

time = np.arange(len(df), dtype="float32")
series=df["Average"]
climate=df2["QV2M"]
print(df.shape,df2.shape)
df.head(20)
df2.head(20)


In [None]:
print(series)
print(climate)

In [None]:
plt.plot(time,series)
plt.show()
print(len(series),len(climate))
plt.plot(time,climate)
plt.show()

In [None]:
def plot_series(time, series, format="-", start=0, end=None):
    """
    Visualizes time series data

    Args:
      time (array of int) - contains the time steps
      series (array of int) - contains the measurements for each time step
      format - line style when plotting the graph
      start - first time step to plot
      end - last time step to plot
    """

    # Setup dimensions of the graph figure
    plt.figure(figsize=(10, 6))

    if type(series) is tuple:

      for series_num in series:
        # Plot the time series data
        plt.plot(time[start:end], series_num[start:end], format)

    else:
      # Plot the time series data
      plt.plot(time[start:end], series[start:end], format)

    # Label the x-axis
    plt.xlabel("Time")

    # Label the y-axis
    plt.ylabel("Value")

    # Overlay a grid on the graph
    plt.grid(True)

    # Draw the graph on screen
    plt.show()


In [None]:
plot_series(time, series)
plot_series(time, climate)

In [None]:
# Define the split time
split_time = 2000

# Get the train set
time_train = time[:split_time]
x1_train = series[:split_time]
x2_train=climate[:split_time]

print(len(x1_train),len(x2_train))
# Get the validation set
time_valid = time[split_time:]
x1_valid = series[split_time:]
x2_=climate[split_time:]

In [None]:
# Parameters
window_size = 20
batch_size = 2
shuffle_buffer_size = 2500

In [None]:
def create_windowed_dataset(series1, series2, window_size, batch_size):
    series1 = np.array(series1)  # Convert to NumPy array
    series2 = np.array(series2)  # Convert to NumPy array

    data = []
    labels = []
    for i in range(len(series1) - window_size):
        window = series1[i:i + window_size], series2[i:i + window_size]
        label = series1[i + window_size] 
        window = np.concatenate(window)
        label = np.array([label])
        data.append(window)
        labels.append(label)
    data = np.array(data)
    labels = np.array(labels)

    data = tf.convert_to_tensor(data)
    labels = tf.convert_to_tensor(labels)

    dataset = tf.data.Dataset.from_tensor_slices((data, labels))
    dataset = dataset.shuffle(len(data)).batch(batch_size).prefetch(1)
    return dataset




In [None]:
dataset = create_windowed_dataset(x1_train, x2_train, window_size, batch_size)

In [None]:
# Print shapes of feature and label
for window in dataset.take(1):
  print(f'shape of feature: {window[0].shape}')
  print(f'shape of label: {window[1].shape}')

In [None]:
def model_forecast(model, series, window_size, batch_size):
    """Uses an input model to generate predictions on data windows

    Args:
      model (TF Keras Model) - model that accepts data windows
      series (array of float) - contains the values of the time series 
      window_size (int) - the number of time steps to include in the window
      batch_size (int) - the batch size

    Returns:
      forecast (numpy array) - array containing predictions
    """

    # Generate a TF Dataset from the series values
    dataset = tf.data.Dataset.from_tensor_slices(series)

    # Window the data but only take those with the specified size
    dataset = dataset.window(window_size, shift=1, drop_remainder=True)

    # Flatten the windows by putting its elements in a single batch
    dataset = dataset.flat_map(lambda w: w.batch(window_size))

    # Create batches of windows
    dataset = dataset.batch(batch_size).prefetch(1)

    # Get predictions on the entire dataset
    forecast = model.predict(dataset)

    return forecast

In [None]:
# Build the model
model_simplernn = tf.keras.models.Sequential([
  tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
                      input_shape=[window_size]),
  tf.keras.layers.SimpleRNN(25, return_sequences=True,activation="ReLU"),
  tf.keras.layers.SimpleRNN(15,activation="ReLU"),
  tf.keras.layers.Dense(1,activation="linear"),
])

# Set the optimizer

# Set the training parameters
model_simplernn.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss=tf.keras.losses.MeanSquaredError(),
              metrics=["mse"])

# Train the model
history = model_simplernn.fit(dataset,epochs=10)

In [None]:
forecast_series = series[:-1]
print(len(forecast_series))
forecast = model_forecast(model_simplernn, forecast_series, window_size, batch_size)
results = forecast.squeeze()
print(series[window_size:].shape[0],results.shape[0],time[window_size:].shape[0])
plt.plot(time[window_size:],series[window_size:])
plt.show()
plot_series(time[window_size:], (series[window_size:], results))

In [None]:
forecast_series = series[split_time - window_size:-1]
forecast = model_forecast(model_simplernn, forecast_series, window_size, batch_size)

# Drop single dimensional axis
results = forecast.squeeze()

# Plot the results
plot_series(time_valid, (x1_valid, results))

In [None]:
# Build the model
model_gru = tf.keras.models.Sequential([
  tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
                      input_shape=[window_size]),
  tf.keras.layers.GRU(25, return_sequences=True,activation="ReLU"),
  tf.keras.layers.GRU(15,activation="ReLU"),
  tf.keras.layers.Dense(1,activation="linear"),
])

# Set the optimizer

# Set the training parameters
model_gru.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss=tf.keras.losses.MeanSquaredError(),
              metrics=["mse"])

# Train the model
history = model_gru.fit(dataset,epochs=10)

In [None]:
forecast_series = series[:-1]
print(len(forecast_series))
forecast = model_forecast(model_gru, forecast_series, window_size, batch_size)
results = forecast.squeeze()
print(series[window_size:].shape[0],results.shape[0],time[window_size:].shape[0])
plt.plot(time[window_size:],series[window_size:])
plt.show()
plot_series(time[window_size:], (series[window_size:], results))

In [None]:
forecast_series = series[split_time - window_size:-1]
forecast = model_forecast(model_gru, forecast_series, window_size, batch_size)

# Drop single dimensional axis
results = forecast.squeeze()

# Plot the results
plot_series(time_valid, (x1_valid, results))

In [None]:
# Build the model
model_lstm = tf.keras.models.Sequential([
  tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
                      input_shape=[window_size]),
  tf.keras.layers.LSTM(25, return_sequences=True,activation="ReLU"),
  tf.keras.layers.LSTM(15,activation="ReLU"),
  tf.keras.layers.Dense(1,activation="ReLU"),
])

# Set the optimizer

# Set the training parameters
model_lstm.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss=tf.keras.losses.MeanSquaredError(),
              metrics=["mse"])

# Train the model
history = model_lstm.fit(dataset,epochs=10)

In [None]:
forecast_series = series[:-1]
print(len(forecast_series))
forecast = model_forecast(model_lstm, forecast_series, window_size, batch_size)
results = forecast.squeeze()
print(series[window_size:].shape[0],results.shape[0],time[window_size:].shape[0])
plt.plot(time[window_size:],series[window_size:])
plt.show()
plot_series(time[window_size:], (series[window_size:], results))

In [None]:
forecast_series = series[split_time - window_size:-1]
forecast = model_forecast(model_lstm, forecast_series, window_size, batch_size)

# Drop single dimensional axis
results = forecast.squeeze()

# Plot the results
plot_series(time_valid, (x1_valid, results))