In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

In [2]:
# Check for GPU
import tensorflow as tf
try:
    from google.colab import drive
    IN_COLAB=True
except:
    IN_COLAB=False

if IN_COLAB:
    print("We're running Colab")
else:
    print(tf.config.list_physical_devices())
    print('\nCUDA GPU: ' + str(tf.test.is_gpu_available(cuda_only=True)))

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.

CUDA GPU: True


# Data Preprocessing

In [3]:
import os
os.chdir('..')

df = pd.read_csv('./daily01-ithaca/daily01-NY_Ithaca_13_E.csv', header = 0, index_col = 0)

In [4]:
df

Unnamed: 0,WBANNO,LST_DATE,CRX_VN,LONGITUDE,LATITUDE,T_DAILY_MAX,T_DAILY_MIN,T_DAILY_MEAN,T_DAILY_AVG,P_DAILY_CALC,...,SOIL_MOISTURE_5_DAILY,SOIL_MOISTURE_10_DAILY,SOIL_MOISTURE_20_DAILY,SOIL_MOISTURE_50_DAILY,SOIL_MOISTURE_100_DAILY,SOIL_TEMP_5_DAILY,SOIL_TEMP_10_DAILY,SOIL_TEMP_20_DAILY,SOIL_TEMP_50_DAILY,SOIL_TEMP_100_DAILY
0,64758,20041027,1.201,-76.25,42.44,,,,,,...,,,,,,,,,,
1,64758,20041028,1.201,-76.25,42.44,12.7,-0.3,6.2,5.0,0.0,...,,,,,,,,,,
2,64758,20041029,1.201,-76.25,42.44,16.3,2.5,9.4,9.7,0.0,...,,,,,,,,,,
3,64758,20041030,1.201,-76.25,42.44,17.5,10.5,14.0,14.5,1.8,...,,,,,,,,,,
4,64758,20041031,1.201,-76.25,42.44,17.0,9.1,13.1,12.6,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,64758,20231101,2.622,-76.25,42.44,3.5,-2.2,0.6,0.2,3.0,...,0.352,0.328,0.329,0.364,0.021,7.0,7.7,8.7,10.0,11.6
305,64758,20231102,2.622,-76.25,42.44,6.1,-3.3,1.4,1.0,0.0,...,0.352,0.323,0.325,0.357,0.021,6.4,6.9,7.8,9.3,11.3
306,64758,20231103,2.622,-76.25,42.44,12.9,1.4,7.1,7.4,0.0,...,0.345,0.319,0.320,0.327,0.019,6.9,7.0,7.7,8.9,10.9
307,64758,20231104,2.622,-76.25,42.44,13.4,5.7,9.6,9.5,0.0,...,0.338,0.313,0.312,0.315,0.024,8.1,7.9,8.3,8.9,10.6


In [5]:
Date = pd.to_datetime(df.LST_DATE, format='%Y%m%d', errors='coerce')
df['Time'] = Date

In [6]:
df.columns

Index(['WBANNO', 'LST_DATE', 'CRX_VN', 'LONGITUDE', 'LATITUDE', 'T_DAILY_MAX',
       'T_DAILY_MIN', 'T_DAILY_MEAN', 'T_DAILY_AVG', 'P_DAILY_CALC',
       'SOLARAD_DAILY', 'SUR_TEMP_DAILY_TYPE', 'SUR_TEMP_DAILY_MAX',
       'SUR_TEMP_DAILY_MIN', 'SUR_TEMP_DAILY_AVG', 'RH_DAILY_MAX',
       'RH_DAILY_MIN', 'RH_DAILY_AVG', 'SOIL_MOISTURE_5_DAILY',
       'SOIL_MOISTURE_10_DAILY', 'SOIL_MOISTURE_20_DAILY',
       'SOIL_MOISTURE_50_DAILY', 'SOIL_MOISTURE_100_DAILY',
       'SOIL_TEMP_5_DAILY', 'SOIL_TEMP_10_DAILY', 'SOIL_TEMP_20_DAILY',
       'SOIL_TEMP_50_DAILY', 'SOIL_TEMP_100_DAILY', 'Time'],
      dtype='object')

In [7]:
data = df[['T_DAILY_MAX',
       'T_DAILY_MIN', 'T_DAILY_MEAN', 'T_DAILY_AVG', 'P_DAILY_CALC',
       'SOLARAD_DAILY', 'SUR_TEMP_DAILY_MAX',
       'SUR_TEMP_DAILY_MIN', 'SUR_TEMP_DAILY_AVG', 'RH_DAILY_MAX',
       'RH_DAILY_MIN', 'RH_DAILY_AVG']]

In [8]:
data.index = df['Time']

In [9]:
# check for N/A
data.min()

T_DAILY_MAX          -17.90
T_DAILY_MIN          -30.90
T_DAILY_MEAN         -24.40
T_DAILY_AVG          -23.20
P_DAILY_CALC           0.00
SOLARAD_DAILY          0.06
SUR_TEMP_DAILY_MAX   -16.50
SUR_TEMP_DAILY_MIN   -36.00
SUR_TEMP_DAILY_AVG   -23.60
RH_DAILY_MAX          51.50
RH_DAILY_MIN          11.90
RH_DAILY_AVG          36.40
dtype: float64

In [10]:
data

Unnamed: 0_level_0,T_DAILY_MAX,T_DAILY_MIN,T_DAILY_MEAN,T_DAILY_AVG,P_DAILY_CALC,SOLARAD_DAILY,SUR_TEMP_DAILY_MAX,SUR_TEMP_DAILY_MIN,SUR_TEMP_DAILY_AVG,RH_DAILY_MAX,RH_DAILY_MIN,RH_DAILY_AVG
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2004-10-27,,,,,,,,,,,,
2004-10-28,12.7,-0.3,6.2,5.0,0.0,12.67,,,6.5,,,
2004-10-29,16.3,2.5,9.4,9.7,0.0,8.99,,,10.2,,,
2004-10-30,17.5,10.5,14.0,14.5,1.8,4.14,,,13.5,,,
2004-10-31,17.0,9.1,13.1,12.6,0.0,3.42,,,10.8,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-01,3.5,-2.2,0.6,0.2,3.0,5.25,9.0,-4.6,0.3,89.9,69.2,83.8
2023-11-02,6.1,-3.3,1.4,1.0,0.0,6.21,12.1,-5.2,0.6,90.0,53.4,75.2
2023-11-03,12.9,1.4,7.1,7.4,0.0,10.68,13.5,-1.1,5.7,63.6,26.1,43.0
2023-11-04,13.4,5.7,9.6,9.5,0.0,5.49,16.5,3.5,8.0,78.3,34.7,50.7


In [11]:
data.isna().sum()

T_DAILY_MAX             76
T_DAILY_MIN             76
T_DAILY_MEAN            76
T_DAILY_AVG             77
P_DAILY_CALC            47
SOLARAD_DAILY           56
SUR_TEMP_DAILY_MAX     438
SUR_TEMP_DAILY_MIN     438
SUR_TEMP_DAILY_AVG      56
RH_DAILY_MAX          2405
RH_DAILY_MIN          2405
RH_DAILY_AVG          2405
dtype: int64

In [12]:
# Check data types
data.dtypes

T_DAILY_MAX           float64
T_DAILY_MIN           float64
T_DAILY_MEAN          float64
T_DAILY_AVG           float64
P_DAILY_CALC          float64
SOLARAD_DAILY         float64
SUR_TEMP_DAILY_MAX    float64
SUR_TEMP_DAILY_MIN    float64
SUR_TEMP_DAILY_AVG    float64
RH_DAILY_MAX          float64
RH_DAILY_MIN          float64
RH_DAILY_AVG          float64
dtype: object

In [13]:
data.shape

(6949, 12)

In [14]:
# forward fill the missing values  
data.ffill(axis = 0, inplace = True) 

In [15]:
data

Unnamed: 0_level_0,T_DAILY_MAX,T_DAILY_MIN,T_DAILY_MEAN,T_DAILY_AVG,P_DAILY_CALC,SOLARAD_DAILY,SUR_TEMP_DAILY_MAX,SUR_TEMP_DAILY_MIN,SUR_TEMP_DAILY_AVG,RH_DAILY_MAX,RH_DAILY_MIN,RH_DAILY_AVG
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2004-10-27,,,,,,,,,,,,
2004-10-28,12.7,-0.3,6.2,5.0,0.0,12.67,,,6.5,,,
2004-10-29,16.3,2.5,9.4,9.7,0.0,8.99,,,10.2,,,
2004-10-30,17.5,10.5,14.0,14.5,1.8,4.14,,,13.5,,,
2004-10-31,17.0,9.1,13.1,12.6,0.0,3.42,,,10.8,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-01,3.5,-2.2,0.6,0.2,3.0,5.25,9.0,-4.6,0.3,89.9,69.2,83.8
2023-11-02,6.1,-3.3,1.4,1.0,0.0,6.21,12.1,-5.2,0.6,90.0,53.4,75.2
2023-11-03,12.9,1.4,7.1,7.4,0.0,10.68,13.5,-1.1,5.7,63.6,26.1,43.0
2023-11-04,13.4,5.7,9.6,9.5,0.0,5.49,16.5,3.5,8.0,78.3,34.7,50.7


In [16]:
# drop NaN at the top
data.dropna(inplace = True)

In [17]:
# set target
data['target'] = data['T_DAILY_AVG']

In [18]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, test_size=0.2, shuffle = False)

In [20]:
from sklearn.preprocessing import MinMaxScaler
import pickle

scaler = MinMaxScaler()
scaler.fit(train)
train = scaler.transform(train)
test = scaler.transform(test)

with open('./LSTM/models_daily_v2/daily_v2_scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [21]:
train.shape

(3672, 13)

In [22]:
test.shape

(918, 13)

In [23]:
# splitting data into sequences
def split_sequences(features, target, seq_len, forecast_len):
    X,y = list(), list()
    for i in range(len(features)):
        end_input = i + seq_len
        end_predict = end_input + forecast_len
        if end_predict > len(features)-1:
            break
        seq_x, seq_y = features[i:end_input,:], target[end_input:end_predict]
        X.append(seq_x)
        y.append(seq_y)
    return tf.convert_to_tensor(X, dtype=tf.float64), tf.convert_to_tensor(y, dtype=tf.float64)

# Define Model

In [24]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, RNN, LSTMCell, Input, Bidirectional
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.utils import plot_model

class MyModel(tf.keras.Model):

    def __init__(self, input_shape, output_shape, name = 'LSTM-FC'):
        super().__init__(name = name)
        self.input_layer = Input(shape = input_shape, name = 'input')
        self.lstm1 = LSTM(units=30, activation = 'tanh', input_shape = input_shape, return_sequences=False, name = 'lstm_1')
        self.dense1 = Dense(units=20, activation = 'relu', name = 'dense_1')
        self.dense2 = Dense(units=10, activation = 'relu', name = 'dense_2')
        self.dense3 = Dense(units = output_shape, activation = None, name = 'dense_3')
        #self.dropout = tf.keras.layers.Dropout(0.5)

    def call(self, inputs, training=False):
        x = self.lstm1(inputs)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        #if training:
        #  x = self.dropout(x, training=training)
        return x
    
    def summary(self):
        model = Model(inputs = [self.input_layer], outputs = self.call(self.input_layer), name = self.name)
        return model.summary()

# Model Training
## input length : output length = 16:4

In [25]:
# prepare sequences
seq_len = 16
forecast_len = 4
X_train, y_train = split_sequences(train[:,:-1], train[:,-1], seq_len = seq_len, forecast_len = forecast_len)
X_test, y_test = split_sequences(test[:,:-1], test[:,-1],seq_len = seq_len, forecast_len =  forecast_len)
n_features = X_train.shape[2]

In [26]:
X_train.shape

TensorShape([3652, 16, 12])

In [27]:
y_train.shape

TensorShape([3652, 4])

In [28]:
# create model instance
model_name = 'LSTM-FC_16-4'
model = MyModel(input_shape = (seq_len, n_features), output_shape = (forecast_len), name = model_name)
model.summary()

Model: "LSTM-FC_16-4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 16, 12)]          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 30)                5160      
_________________________________________________________________
dense_1 (Dense)              (None, 20)                620       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 44        
Total params: 6,034
Trainable params: 6,034
Non-trainable params: 0
_________________________________________________________________


In [29]:
# Fit the model
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate = 0.01), metrics = ['mse', 'acc'])
model.fit(X_train, 
          y_train, 
          batch_size=100,
          epochs=30,
          verbose='auto',
          callbacks=None,
          validation_split=0.1,
          shuffle=True)

# save trained model
model.save('./LSTM/models_daily_v2/' + model_name)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30




INFO:tensorflow:Assets written to: ./LSTM/models_daily_v2/LSTM-FC_16-4\assets


INFO:tensorflow:Assets written to: ./LSTM/models_daily_v2/LSTM-FC_16-4\assets


In [30]:
y_hat_train = model.predict(X_train)
y_hat_test = model.predict(X_test)

In [31]:
from sklearn.metrics import mean_squared_error
print('mean_squared_error')
print('train set:', mean_squared_error(y_train, y_hat_train, sample_weight=None))
print('test set:', mean_squared_error(y_test, y_hat_test, sample_weight=None))

mean_squared_error
train set: 0.006819459963292694
test set: 0.006730547450247808


## input length : output length = 24:6

In [32]:
# reset memory
tf.Graph().as_default() 

# prepare sequences
seq_len = 24
forecast_len = 6
X_train, y_train = split_sequences(train[:,:-1], train[:,-1], seq_len = seq_len, forecast_len = forecast_len)
X_test, y_test = split_sequences(test[:,:-1], test[:,-1],seq_len = seq_len, forecast_len =  forecast_len)
n_features = X_train.shape[2]

In [33]:
# create model instance
model_name = 'LSTM-FC_24-6'
model = MyModel(input_shape = (seq_len, n_features), output_shape = (forecast_len), name = model_name)
model.summary()

Model: "LSTM-FC_24-6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 24, 12)]          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 30)                5160      
_________________________________________________________________
dense_1 (Dense)              (None, 20)                620       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 66        
Total params: 6,056
Trainable params: 6,056
Non-trainable params: 0
_________________________________________________________________


In [34]:
# Fit the model
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate = 0.01), metrics = ['mse', 'acc'])
model.fit(X_train, 
          y_train, 
          batch_size=100,
          epochs=30,
          verbose='auto',
          callbacks=None,
          validation_split=0.1,
          shuffle=True)

# save trained model
model.save('./LSTM/models_daily_v2/' + model_name)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30




INFO:tensorflow:Assets written to: ./LSTM/models_daily_v2/LSTM-FC_24-6\assets


INFO:tensorflow:Assets written to: ./LSTM/models_daily_v2/LSTM-FC_24-6\assets


In [35]:
# evaluate the model
from sklearn.metrics import mean_squared_error

y_hat_train = model.predict(X_train)
y_hat_test = model.predict(X_test)

print('mean_squared_error')
print('train set:', mean_squared_error(y_train, y_hat_train, sample_weight=None))
print('test set:', mean_squared_error(y_test, y_hat_test, sample_weight=None))

mean_squared_error
train set: 0.00755312900193088
test set: 0.0073587745421180165


## input length : output length = 32:8

In [36]:
# reset memory
tf.Graph().as_default() 

# prepare sequences
seq_len = 32
forecast_len = 8
X_train, y_train = split_sequences(train[:,:-1], train[:,-1], seq_len = seq_len, forecast_len = forecast_len)
X_test, y_test = split_sequences(test[:,:-1], test[:,-1],seq_len = seq_len, forecast_len =  forecast_len)
n_features = X_train.shape[2]

In [37]:
# create model instance
model_name = 'LSTM-FC_32-8'
model = MyModel(input_shape = (seq_len, n_features), output_shape = (forecast_len), name = model_name)
model.summary()

Model: "LSTM-FC_32-8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 32, 12)]          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 30)                5160      
_________________________________________________________________
dense_1 (Dense)              (None, 20)                620       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_3 (Dense)              (None, 8)                 88        
Total params: 6,078
Trainable params: 6,078
Non-trainable params: 0
_________________________________________________________________


In [38]:
# Fit the model
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate = 0.01), metrics = ['mse', 'acc'])
model.fit(X_train, 
          y_train, 
          batch_size=100,
          epochs=30,
          verbose='auto',
          callbacks=None,
          validation_split=0.1,
          shuffle=True)

# save trained model
model.save('./LSTM/models_daily_v2/' + model_name)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30




INFO:tensorflow:Assets written to: ./LSTM/models_daily_v2/LSTM-FC_32-8\assets


INFO:tensorflow:Assets written to: ./LSTM/models_daily_v2/LSTM-FC_32-8\assets


In [39]:
# evaluate the model
from sklearn.metrics import mean_squared_error

y_hat_train = model.predict(X_train)
y_hat_test = model.predict(X_test)

print('mean_squared_error')
print('train set:', mean_squared_error(y_train, y_hat_train, sample_weight=None))
print('test set:', mean_squared_error(y_test, y_hat_test, sample_weight=None))

mean_squared_error
train set: 0.008169509256797243
test set: 0.008158481069716755


## input length : output length = 40:10

In [40]:
# reset memory
tf.Graph().as_default() 

# prepare sequences
seq_len = 40
forecast_len = 10
X_train, y_train = split_sequences(train[:,:-1], train[:,-1], seq_len = seq_len, forecast_len = forecast_len)
X_test, y_test = split_sequences(test[:,:-1], test[:,-1],seq_len = seq_len, forecast_len =  forecast_len)
n_features = X_train.shape[2]

In [41]:
# create model instance
model_name = 'LSTM-FC_40-10'
model = MyModel(input_shape = (seq_len, n_features), output_shape = (forecast_len), name = model_name)
model.summary()

Model: "LSTM-FC_40-10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 40, 12)]          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 30)                5160      
_________________________________________________________________
dense_1 (Dense)              (None, 20)                620       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                210       
_________________________________________________________________
dense_3 (Dense)              (None, 10)                110       
Total params: 6,100
Trainable params: 6,100
Non-trainable params: 0
_________________________________________________________________


In [42]:
# Fit the model
model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate = 0.01), metrics = ['mse', 'acc'])
model.fit(X_train, 
          y_train, 
          batch_size=100,
          epochs=30,
          verbose='auto',
          callbacks=None,
          validation_split=0.1,
          shuffle=True)

# save trained model
model.save('./LSTM/models_daily_v2/' + model_name)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30




INFO:tensorflow:Assets written to: ./LSTM/models_daily_v2/LSTM-FC_40-10\assets


INFO:tensorflow:Assets written to: ./LSTM/models_daily_v2/LSTM-FC_40-10\assets


In [43]:
# evaluate the model
from sklearn.metrics import mean_squared_error

y_hat_train = model.predict(X_train)
y_hat_test = model.predict(X_test)

print('mean_squared_error')
print('train set:', mean_squared_error(y_train, y_hat_train, sample_weight=None))
print('test set:', mean_squared_error(y_test, y_hat_test, sample_weight=None))

mean_squared_error
train set: 0.00764985931919338
test set: 0.0074693488388376415
