Seguir tutorial [aqui](https://www.tensorflow.org/tutorials/structured_data/time_series?hl=pt-br)

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
DRIVE_PATH = "drive/MyDrive/TCC/"
COLLAB_ENV = True

In [29]:
df = pd.read_csv((DRIVE_PATH if COLLAB_ENV else "") + "data/results/time_data.csv", sep=";")

In [30]:
df["time"] = pd.to_datetime(df["time"])

  df["time"] = pd.to_datetime(df["time"])


In [31]:
n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7): int(n*0.9)]
test_df = df[int(n*0.9):]

print(len(train_df), len(val_df), len(test_df))

9072 2592 1296


In [32]:
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=val_df, test_df=test_df,
               label_columns=None):
    # Make the raw data available to the data window.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Get the column index positions of the label features.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}

    # Get the row index positions of the full window, the inputs,
    # and the label(s).
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

In [33]:
ts_w1 = WindowGenerator(input_width = 6,
                       label_width = 1,
                       shift = 1,
                       label_columns=["INTERVAL_READ"])

print(ts_w1)

Total window size: 7
Input indices: [0 1 2 3 4 5]
Label indices: [6]
Label column name(s): ['INTERVAL_READ']


In [34]:
def split_window(self, features):
  inputs = features[:, self.input_slice, :]
  labels = features[:, self.labels_slice, :]
  if self.label_columns is not None:
    labels = tf.stack(
        [labels[:, :, self.column_indices[name]] for name in self.label_columns],
        axis=-1)

  # Reset the shape of the slices.
  inputs.set_shape([None, self.input_width, None])
  labels.set_shape([None, self.label_width, None])

  return inputs, labels

# Add this function to the WindowGenerator class.
WindowGenerator.split_window = split_window

In [37]:
timestamp_s = df["time"].map(pd.Timestamp.timestamp)

In [38]:
day = 24*60*60
year = (365.2425)*day

df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))

In [35]:
# Stack three slices, the length of the total window.
example_window = tf.stack([np.array(train_df[:ts_w1.total_window_size].astype("float32")),
                           np.array(train_df[100:100+ts_w1.total_window_size].astype("float32")),
                           np.array(train_df[200:200+ts_w1.total_window_size].astype("float32"))])

example_inputs, example_labels = ts_w1.split_window(example_window)

print('All shapes are: (batch, time, features)')
print(f'Window shape: {example_window.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'Labels shape: {example_labels.shape}')

TypeError: float() argument must be a string or a real number, not 'Timestamp'

In [10]:
train_df[:ts_w1.total_window_size]

Unnamed: 0,time,812A,833A,812A_tweets,833A_tweets,812A_flood,833A_flood
0,2018-12-31 22:00:00-02:00,0.0,0.0,0.0,0.0,0,0
1,2018-12-31 22:10:00-02:00,0.0,0.0,0.0,0.0,0,0
2,2018-12-31 22:20:00-02:00,0.0,0.0,0.0,0.0,0,0
3,2018-12-31 22:30:00-02:00,0.0,0.0,0.0,0.0,0,0
4,2018-12-31 22:40:00-02:00,0.0,0.0,0.0,0.0,0,0
5,2018-12-31 22:50:00-02:00,0.0,0.0,0.0,0.0,0,0
6,2018-12-31 23:00:00-02:00,0.0,0.0,0.0,0.0,0,0


In [15]:
np.array(train_df[:ts_w1.total_window_size].fillna(0))

array([['2018-12-31 22:00:00-02:00', 0.0, 0.0, 0.0, 0.0, 0, 0],
       ['2018-12-31 22:10:00-02:00', 0.0, 0.0, 0.0, 0.0, 0, 0],
       ['2018-12-31 22:20:00-02:00', 0.0, 0.0, 0.0, 0.0, 0, 0],
       ['2018-12-31 22:30:00-02:00', 0.0, 0.0, 0.0, 0.0, 0, 0],
       ['2018-12-31 22:40:00-02:00', 0.0, 0.0, 0.0, 0.0, 0, 0],
       ['2018-12-31 22:50:00-02:00', 0.0, 0.0, 0.0, 0.0, 0, 0],
       ['2018-12-31 23:00:00-02:00', 0.0, 0.0, 0.0, 0.0, 0, 0]],
      dtype=object)