In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
pd.options.display.float_format = "{:.2f}".format

import matplotlib.pyplot as plt
import seaborn as sns

csv_path = "C:/Users/USER/.keras/datasets/jena_climate_2009_2016.csv"

In [2]:
df = pd.read_csv(csv_path)
df = df[5: :6]
date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')

wv = df["wv (m/s)"]
bad_wv = wv == -9999.0
wv[bad_wv] = 0.0
max_wv = df["max. wv (m/s)"]
bad_max_wv = max_wv == -9999.0
max_wv[bad_max_wv] = 0.0


wv = df.pop('wv (m/s)')
max_wv = df.pop('max. wv (m/s)')
wd_rad = df.pop('wd (deg)')*np.pi / 180
df["Wx"] = wv * np.cos(wd_rad)
df["Wy"] = wv * np.sin(wd_rad)
df["max Wx"] = max_wv * np.cos(wd_rad)
df["max Wy"] = max_wv * np.sin(wd_rad)

timestamp_s = date_time.map(pd.Timestamp.timestamp)
day = 24*60*60
year = (365.2425)*day
df["Day sin"] = np.sin(timestamp_s * (2 * np.pi /day))
df["Day cos"] = np.cos(timestamp_s * (2 * np.pi /day))
df["Year sin"] = np.sin(timestamp_s * (2 * np.pi / year))
df["Year cos"] = np.cos(timestamp_s * (2 * np.pi / year))

fft = tf.signal.rfft(df["T (degC)"])
f_per_dataset = np.arange(0, len(fft))
n_samples_h = len(df["T (degC)"])
hours_per_year = 24*365.2524
years_per_dataset = n_samples_h / (hours_per_year)
f_per_year = f_per_dataset / years_per_dataset

column_indices = {name: i for i, name in enumerate(df.columns)}
n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]
num_features = df.shape[1]

train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

In [3]:
train_df

Unnamed: 0,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),Wx,Wy,max Wx,max Wy,Day sin,Day cos,Year sin,Year cos
5,0.95,-1.98,-2.04,-1.92,1.12,-1.30,-1.48,-0.79,-1.48,-1.48,2.22,0.19,0.22,0.11,0.22,0.37,1.37,-0.06,1.43
11,0.96,-2.08,-2.14,-2.06,1.04,-1.33,-1.53,-0.79,-1.54,-1.54,2.33,0.17,0.22,0.11,0.23,0.71,1.22,-0.06,1.43
17,0.99,-2.07,-2.13,-2.05,1.06,-1.33,-1.53,-0.79,-1.53,-1.53,2.32,0.21,0.28,0.11,0.32,1.00,1.00,-0.06,1.43
23,1.00,-2.10,-2.16,-2.10,1.01,-1.34,-1.55,-0.78,-1.55,-1.55,2.36,0.27,0.20,0.25,0.15,1.22,0.71,-0.06,1.43
29,1.06,-2.17,-2.23,-2.19,0.98,-1.35,-1.58,-0.78,-1.58,-1.59,2.45,0.11,0.35,0.05,0.40,1.37,0.37,-0.06,1.43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
294353,0.17,0.90,0.89,1.57,0.95,0.78,1.98,-0.49,1.98,1.98,-0.88,-0.17,-1.51,-0.16,-1.36,-0.96,1.04,-0.86,-1.15
294359,0.27,0.88,0.85,1.41,0.62,0.74,1.68,-0.28,1.67,1.67,-0.82,-0.82,-0.64,-0.70,-0.48,-0.65,1.25,-0.86,-1.15
294365,0.22,0.74,0.72,1.43,1.09,0.56,1.72,-0.61,1.71,1.71,-0.72,-0.03,0.65,0.01,0.60,-0.31,1.38,-0.86,-1.15
294371,0.21,0.71,0.69,1.41,1.15,0.52,1.69,-0.65,1.69,1.68,-0.69,0.01,-0.10,-0.10,-0.20,0.06,1.41,-0.86,-1.15


In [43]:
# 인출연습
example_window = tf.stack([np.array(train_df[:total_window_size]),
    np.array(train_df[100:100+total_window_size]),
    np.array(train_df[200:200+total_window_size])])

input_width = 6
shift = 1
total_window_size = input_width + shift

input_slice = slice(None, input_width)
inputs = example_window[:, input_slice, :]
inputs.set_shape([None, input_width, None])

label_width = 1
label_start = total_window_size - label_width
labels_slice = slice(label_start, None)
labels = example_window[:, labels_slice, :]


label_columns = ["T (degC)"]
column_indices = {name:i for i, name in
                 enumerate(train_df.columns)}
labels = tf.stack([labels[:, :, column_indices[name]] for name in label_columns],
                  axis=-1)
labels.set_shape([None, label_width, None])