# Imports

In [1]:
from collections import defaultdict

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

# Function definitions

In [2]:
#new

def split_ts_data(data, val_start, test_start):
    year_min = min(data['Year'])
    year_max = max(data['Year'])
    year_range = year_max-year_min
    
    assert (val_start >= year_min) & (test_start >= year_min) & (val_start <= year_max) & (test_start <= year_max), "Parameter out of bounds"
    assert (val_start > year_min) & (test_start > year_min), "Training set is empty."
    assert val_start < test_start, "Validation set is empty."
    assert year_range > 0, "Data contains less than 2 years."
    
    
    train_data = data[(data['Year']<val_start) & (data['Year']<test_start)]
    val_data = data[(data['Year']>=val_start) & (data['Year']<test_start)]
    test_data = data[data['Year']>=test_start]
    
    return train_data, val_data, test_data

In [3]:
def make_dataset(df, input_width, label_width, shift):
    def create_window(tensor):
        #input -> length of time series used for training
        #shift -> how far off prediction is from last input
        #label -> points to predict
        total_window_size = input_width + shift
        label_start = total_window_size - label_width

        input_bounds = slice(0, input_width)
        label_bounds = slice(label_start, None)

        inputs = tensor[:,input_bounds,:]
        labels = tensor[:,label_bounds,:]

        inputs.set_shape([None, input_width, None])
        labels.set_shape([None, label_width, None])

        return inputs, labels
    
    total_window_size = input_width + shift
    
    arr = np.array(df, dtype=np.float32)
    ds = tf.keras.utils.timeseries_dataset_from_array(
      data=arr,
      targets=None,
      sequence_length=total_window_size,
      sequence_stride=1,
      shuffle=False,
      batch_size=32,)
    
    ds = ds.map(create_window)
    
    return ds

In [4]:
def compile_and_fit(model, epochs, input_optimizer='adam', input_loss='mse'):
    model.compile(optimizer=input_optimizer, loss=input_loss)
    model.fit(train_ds, epochs=epochs, validation_data=val_ds)

# Main code

## Edit parameters here, but do not rename variables

## Read, preprocess data

In [5]:
raw_data = pd.read_csv('newSA3.csv')



#Parameters
validation_start = 2002
test_start = 2006
#



train_df, val_df, test_df = split_ts_data(raw_data, validation_start, test_start)

train_df = train_df[train_df.columns.difference(["Unnamed: 0","Year"])]
val_df = val_df[val_df.columns.difference(["Unnamed: 0","Year"])]
test_df = test_df[test_df.columns.difference(["Unnamed: 0","Year"])]

In [6]:
raw_data.head()

Unnamed: 0.1,Unnamed: 0,Year,10101 m0.4,10102 m0.4,10103 m0.4,10104 m0.4,10201 m0.4,10202 m0.4,10301 m0.4,10302 m0.4,...,70203 f85.,70204 f85.,70205 f85.,80101 f85.,80103 f85.,80105 f85.,80106 f85.,80107 f85.,80108 f85.,80109 f85.
0,1,1991,2603,1593,741,1985,5373,4332,1530,2621,...,4,2,5,145,2,178,196,41,91,116
1,2,1992,2605,1625,740,1962,5504,4503,1522,2615,...,6,5,6,164,6,180,201,49,93,134
2,3,1993,2585,1638,736,1920,5577,4620,1496,2580,...,8,6,7,187,9,191,221,58,103,154
3,4,1994,2557,1643,728,1873,5620,4716,1474,2540,...,12,9,8,199,11,198,230,65,107,171
4,5,1995,2508,1631,714,1803,5632,4772,1442,2470,...,16,13,10,221,15,201,244,69,114,190


In [7]:
train_df.head()

Unnamed: 0,10101 f0.4,10101 f10.14,10101 f15.19,10101 f20.24,10101 f25.29,10101 f30.34,10101 f35.39,10101 f40.44,10101 f45.49,10101 f5.9,...,80109 m45.49,80109 m5.9,80109 m50.54,80109 m55.59,80109 m60.64,80109 m65.69,80109 m70.74,80109 m75.79,80109 m80.84,80109 m85.
0,2400,2347,2277,1820,2152,2315,2185,2146,1859,2495,...,1219,1002,1226,920,784,606,328,180,98,50
1,2392,2344,2194,1830,2067,2339,2204,2139,1954,2480,...,1257,1009,1209,931,794,617,376,200,104,55
2,2382,2354,2126,1813,1977,2329,2225,2139,2037,2451,...,1285,993,1154,963,792,634,413,209,105,59
3,2357,2351,2084,1777,1913,2316,2232,2158,2079,2418,...,1284,988,1123,998,778,649,459,212,112,65
4,2318,2357,2055,1734,1869,2272,2256,2165,2111,2403,...,1277,998,1105,1020,819,648,486,247,116,69


## Create tf datasets

In [8]:
#Parameters
input_width = 5 #data used in prediction
label_width = 1 #points to predict
shift = 5 #how many years away is the last point to predict
#



train_ds = make_dataset(train_df, input_width, label_width, shift)
val_ds = make_dataset(val_df, input_width, label_width, shift)
test_ds = make_dataset(test_df, input_width, label_width, shift)

In [9]:
for e in train_ds:
    print(e)

(<tf.Tensor: shape=(2, 5, 11700), dtype=float32, numpy=
array([[[2400., 2347., 2277., ...,  180.,   98.,   50.],
        [2392., 2344., 2194., ...,  200.,  104.,   55.],
        [2382., 2354., 2126., ...,  209.,  105.,   59.],
        [2357., 2351., 2084., ...,  212.,  112.,   65.],
        [2318., 2357., 2055., ...,  247.,  116.,   69.]],

       [[2392., 2344., 2194., ...,  200.,  104.,   55.],
        [2382., 2354., 2126., ...,  209.,  105.,   59.],
        [2357., 2351., 2084., ...,  212.,  112.,   65.],
        [2318., 2357., 2055., ...,  247.,  116.,   69.],
        [2280., 2360., 2060., ...,  277.,  124.,   74.]]], dtype=float32)>, <tf.Tensor: shape=(2, 1, 11700), dtype=float32, numpy=
array([[[2107., 2335., 2029., ...,  439.,  190.,  103.]],

       [[2077., 2259., 2051., ...,  457.,  221.,  119.]]], dtype=float32)>)


## Create and fit model

In [10]:
#Parameters
num_epochs = 20

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.LSTM(50, return_sequences=False))
model.add(tf.keras.layers.Dense(1))
#



compile_and_fit(model, num_epochs, input_optimizer='adam', input_loss='mse')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [11]:
model.predict()

TypeError: predict() missing 1 required positional argument: 'x'