In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [4]:
df = pd.read_csv('./daily01-ithaca/daily01-NY_Ithaca_13_E.csv', header = 0, index_col = 0)

In [5]:
df

Unnamed: 0,WBANNO,LST_DATE,CRX_VN,LONGITUDE,LATITUDE,T_DAILY_MAX,T_DAILY_MIN,T_DAILY_MEAN,T_DAILY_AVG,P_DAILY_CALC,...,SOIL_MOISTURE_5_DAILY,SOIL_MOISTURE_10_DAILY,SOIL_MOISTURE_20_DAILY,SOIL_MOISTURE_50_DAILY,SOIL_MOISTURE_100_DAILY,SOIL_TEMP_5_DAILY,SOIL_TEMP_10_DAILY,SOIL_TEMP_20_DAILY,SOIL_TEMP_50_DAILY,SOIL_TEMP_100_DAILY
0,64758,20041027,1.201,-76.25,42.44,,,,,,...,,,,,,,,,,
1,64758,20041028,1.201,-76.25,42.44,12.7,-0.3,6.2,5.0,0.0,...,,,,,,,,,,
2,64758,20041029,1.201,-76.25,42.44,16.3,2.5,9.4,9.7,0.0,...,,,,,,,,,,
3,64758,20041030,1.201,-76.25,42.44,17.5,10.5,14.0,14.5,1.8,...,,,,,,,,,,
4,64758,20041031,1.201,-76.25,42.44,17.0,9.1,13.1,12.6,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,64758,20231101,2.622,-76.25,42.44,3.5,-2.2,0.6,0.2,3.0,...,0.352,0.328,0.329,0.364,0.021,7.0,7.7,8.7,10.0,11.6
305,64758,20231102,2.622,-76.25,42.44,6.1,-3.3,1.4,1.0,0.0,...,0.352,0.323,0.325,0.357,0.021,6.4,6.9,7.8,9.3,11.3
306,64758,20231103,2.622,-76.25,42.44,12.9,1.4,7.1,7.4,0.0,...,0.345,0.319,0.320,0.327,0.019,6.9,7.0,7.7,8.9,10.9
307,64758,20231104,2.622,-76.25,42.44,13.4,5.7,9.6,9.5,0.0,...,0.338,0.313,0.312,0.315,0.024,8.1,7.9,8.3,8.9,10.6


In [None]:
Date = pd.to_datetime(df.LST_DATE, format='%Y%m%d', errors='coerce')
df['Time'] = Date

In [None]:
df.columns

In [None]:
data = df[['T_DAILY_MAX',
       'T_DAILY_MIN', 'T_DAILY_MEAN', 'T_DAILY_AVG', 'P_DAILY_CALC',
       'SOLARAD_DAILY', 'SUR_TEMP_DAILY_MAX',
       'SUR_TEMP_DAILY_MIN', 'SUR_TEMP_DAILY_AVG']]

In [None]:
data.index = df['Time']

In [None]:
# check for N/A
data.min()

In [None]:
data

In [None]:
data.isna().sum()

In [None]:
# Check data types
data.dtypes

In [None]:
data.shape

In [None]:
# set target variables
import copy
data['y'] = copy.deepcopy(data['T_DAILY_AVG'].shift(-1))
data = data.iloc[:-1,:]

In [None]:
data

In [None]:
# remove the last row since there is no data for target variable
data = data[:-1]
# forward fill the missing values  
data.ffill(axis = 0, inplace = True) 

In [None]:
data

In [None]:
# drop NaN at the top
data.dropna(inplace = True)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(data)
scaled_data = scaler.transform(data)

In [None]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(scaled_data, test_size=0.2, shuffle = False)

In [None]:
train.shape

In [None]:
test.shape

In [None]:
# splitting data into sequences
def split_sequences(sequences, n_steps):
    X,y = list(), list()
    for i in range(len(sequences)):
        end_ix = i + n_steps
        if end_ix > len(sequences)-1:
            break
        seq_x, seq_y = sequences[i:end_ix,:-1], sequences[end_ix-1,-1]
        X.append(seq_x)
        y.append(seq_y)
    return tf.convert_to_tensor(X, dtype=tf.float64), tf.convert_to_tensor(y, dtype=tf.float64)

In [None]:
seq_len = 7 
X_train, y_train = split_sequences(train, n_steps = seq_len)
X_test, y_test = split_sequences(test, n_steps = seq_len)

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, RNN, LSTMCell, Input
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import plot_model

model = Sequential([
    LSTM(units=7, activation='relu', input_shape = X_train.shape[1:], return_sequences=True, name = 'lstm_1'),
    LSTM(units=7, activation='relu', name = 'lstm_2', return_sequences=True),
    Dense(units=1, name = 'dense')
], name = 'sequential')


model.compile(loss=MeanSquaredError(), optimizer=Adam(), metrics = MeanSquaredError())

In [None]:
model.summary()

In [None]:
model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=100, batch_size=128, verbose=1)