In [1]:
# The Imports
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
Data = pd.read_csv("Sunspots.csv")

In [3]:
Data.head()

Unnamed: 0.1,Unnamed: 0,Date,Monthly Mean Total Sunspot Number
0,0,1749-01-31,96.7
1,1,1749-02-28,104.3
2,2,1749-03-31,116.7
3,3,1749-04-30,92.8
4,4,1749-05-31,141.7


In [4]:
Data.isna().sum()

Unnamed: 0                           0
Date                                 0
Monthly Mean Total Sunspot Number    0
dtype: int64

In [5]:
Sunspots = Data['Monthly Mean Total Sunspot Number']
series = np.array(Sunspots)
time = np.arange(0, series)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
# The function to plot series data
def plotter(time, series, format="-", start=0, end=None):
    plt.plot(time[start:end], series[start:end], format)
    plt.xlabel("Time (Months)")
    plt.ylabel("Sunspots")
    plt.grid(True)

In [None]:
# Plotting the data to have a 'first look'
plt.figure(figsize = (20, 6))
plotter(time, series)

In [None]:
# Defining the parameters
window_size = 100
batch_size = 256
shuffle_buffer_size = 1000

In [None]:
# Windowing the dataset
def train_data_pipeline(series, window_size, batch_size, shuffle_buffer_size):
    series = tf.expand_dims(series, axis=-1)
    data = tf.data.Dataset.from_tensor_slices(series)
    data = data.window(window_size+1, shift=1, drop_remainder=True)
    data = data.flat_map(lambda w: w.batch(batch_size))
    data = data.shuffle(shuffle_buffer_size)
    data = data.map(lambda w: (w[:-1], w[1:]))
    return data.batch(batch_size).prefetch(1)
    
train = train_data_pipeline(series, window_size, batch_size, shuffle_buffer_size)

In [None]:
# That thing didn't work. So, here's a different model.
tf.keras.backend.clear_session()

# The model
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv1D(filters=64, kernel_size=5, strides=1, padding='causal', activation='sigmoid', input_shape=[None, 1]),
  tf.keras.layers.LSTM(64, return_sequences=True),
  tf.keras.layers.LSTM(64, return_sequences=True),
  tf.keras.layers.LSTM(64, return_sequences=True),
  tf.keras.layers.Dense(32, activation="sigmoid"),
  tf.keras.layers.Dense(16, activation="sigmoid"),
  tf.keras.layers.Dense(1),
  tf.keras.layers.Lambda(lambda x: x * 400)
])

model.summary()

In [None]:
# Compiling and Training the Model
model.compile(loss=tf.keras.losses.Huber(), optimizer='adam', metrics=["mae"])
history = model.fit(train, epochs=500, verbose=0)

In [None]:
# Plotting loss values
plt.plot(history.history['loss'][50:])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.show()

# Plotting mae values
plt.plot(history.history['mae'][50:])
plt.title('Model Mae')
plt.ylabel('Mae')
plt.xlabel('Epoch')
plt.show()

In [None]:
# Okay. That looks good.

# Windowing the data for testing
def test_data_pipeline(series):
    series = tf.expand_dims(series, axis=-1)
    data = tf.data.Dataset.from_tensor_slices(series)
    data = data.window(window_size, shift=1, drop_remainder=True)
    data = data.flat_map(lambda w: w.batch(window_size))
    data = data.batch(batch_size).prefetch(1)
    return data

In [None]:
# Predicting on the same dataset
test = test_data_pipeline(series)
forecast = model.predict(test)
forecast = forecast[:, -1, 0]

In [None]:
# Forecasting sunspots using the trained model
time_valid = time[window_size+1:]
series_ori = series[window_size+1:]
forecast = np.reshape(forecast, (-1))
forecast = forecast[:-1]
plt.figure(figsize=(20, 6))
plotter(time_valid[2000:2500], series_ori[2000:2500])
plotter(time_valid[2000:2500], forecast[2000:2500])