# Regressor

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
df = pd.read_csv('./Kaliningrad_weather_19102020_15102011.csv',  sep=';', encoding='windows-1251')
df.head()

df = df.dropna()
X = df.drop(['T','LocalTime', 'DD'],axis=1)
y = df['T']

In [None]:
minMaxScaler = MinMaxScaler()
X = minMaxScaler.fit_transform(X)

X_all_train,X_test,y_all_train,y_test = train_test_split(X,y,test_size=0.15,random_state=1)

X_train, X_val, y_train, y_val = train_test_split(X_all_train, y_all_train,
                                                  test_size=0.2,random_state=1)

In [None]:
regressorModel = Sequential()
regressorModel.add(Dense(100,activation='relu',input_dim=X_train.shape[1]))
regressorModel.add(Dense(1))
regressorModel.summary()

In [None]:
regressorModel.compile(optimizer='adam',loss='mse',
                              metrics='mae')

In [None]:
early_stop = EarlyStopping(monitor='val_mae',patience=7,
                           mode='min',verbose=1)

checkpoint = ModelCheckpoint('./regressor-{epoch:02d}-{val_mae:.2f}.hdf5',
                             monitor='val_mae',verbose=1,
                             mode='min',save_best_only=True)

callbacks_list = [early_stop,checkpoint]

In [None]:
regressorHistory = regressorModel.fit(X_train, y_train, 
                                      batch_size=25, epochs=100,
                                      callbacks=callbacks_list,
                                      validation_data=(X_val,y_val))

In [None]:
loss_function = regressorHistory.history['loss']
val_loss_function = regressorHistory.history['val_loss']
epochs = range(1,len(loss_function)+1)

plt.title('Потери на обучающей и валидационной выборках')
plt.plot(epochs,loss_function,color='blue',label='Ошибка обучения (MSE)')
plt.plot(epochs,val_loss_function,color='red',label='Ошибка валидации (MSE)')
plt.xlabel('Эпоха')
plt.ylabel('Ошибка модели (MSE)')
plt.legend()
plt.show()

In [None]:
mae = regressorHistory.history['mae']
val_mae = regressorHistory.history['val_mae']

plt.title('MAE на обучающей и валидационной выборках')
plt.plot(epochs,mae,color='blue',label='MAE (обучение)')
plt.plot(epochs,val_mae,color='red',label='MAE (валидация)')
plt.xlabel('Эпоха')
plt.ylabel('MAE')
plt.legend()
plt.show()

In [None]:
y_pred = regressorModel.predict(X_test)
y_pred

In [None]:
print('R2 Score (Test) = ',round(r2_score(y_true=y_test, y_pred=y_pred),3))
print('MAE (Test) = ',round(mean_absolute_error(y_true=y_test,
                                                     y_pred=y_pred),3))

# Simple RNN

In [None]:
df = pd.read_csv('./Kaliningrad_weather_19102020_15102011.csv',  sep=';', encoding='windows-1251')
df.head()

df = df.dropna()
X = df.drop(['T','LocalTime', 'DD'],axis=1)
y = df['T']

In [None]:
min_max_scaler = MinMaxScaler()

data = df['T'].values
data = min_max_scaler.fit_transform(data.reshape(-1,1))
data = data.flatten()
data

In [None]:
window = 10

def get_XY(data, window):
  Y_index = np.arange(window, len(data), window)
  Y = data[Y_index]
  rows_x = len(Y)
  X = data[range(window*rows_x)]
  X = np.reshape(X,(rows_x,window,1))
  return X,Y

X,y=get_XY(data,window)
print(X.shape,y.shape)

In [None]:
a = int(X.shape[0]*0.7)
b = int(X.shape[0]*0.9)

X_train = X[:a,:]
X_val = X[a:b:]
X_test = X[b:,:]
y_train = y[:a]
y_val = y[a:b]
y_test = y[b:]

In [None]:
model = Sequential()
model.add(SimpleRNN(10,activation='relu',input_shape=(window,1)))
model.add(Dense(1,activation='linear'))

model.summary()

In [None]:
model.compile(optimizer='adam',loss='mse',metrics='mae')

In [None]:
history = model.fit(X_train,y_train,epochs=30,batch_size=32,
                    validation_data=(X_val,y_val))

In [None]:
loss_function = history.history['loss']
val_loss_function = history.history['val_loss']
epochs = range(1,len(loss_function)+1)

plt.title('Потери на обучающей и валидационной выборках')
plt.plot(epochs,loss_function,color='blue',label='Ошибка обучения (MSE)')
plt.plot(epochs,val_loss_function,color='red',label='Ошибка валидации (MSE)')
plt.xlabel('Эпоха')
plt.ylabel('Ошибка модели (MSE)')
plt.legend()
plt.show()


In [None]:
y_pred = model.predict(X_test)
y_pred_inv = min_max_scaler.inverse_transform(y_pred)
y_test_inv = min_max_scaler.inverse_transform(y_test.reshape(-1,1))

In [None]:
print('R2 Score (Test) = ',round(r2_score(y_true=y_test_inv,y_pred=y_pred_inv),3))
print('MAE (Test) = ',round(mean_absolute_error(y_true=y_test_inv,
                                                     y_pred=y_pred_inv),3))

In [None]:
plt.plot(range(1,len(y_test_inv)+1),y_test_inv)
plt.plot(range(1,len(y_pred_inv)+1),y_pred_inv)

# LSTM

In [None]:
df = pd.read_csv('./Kaliningrad_weather_19102020_15102011.csv',  sep=';', encoding='windows-1251')
df.head()

df = df.dropna()
X = df.drop(['T','LocalTime', 'DD'],axis=1)
y = df['T']

In [None]:
data = df['T'].values
data = min_max_scaler.fit_transform(data.reshape(-1,1))
data = data.flatten()
data

In [None]:
window = 20
n_samples = data.shape[0]-window
n_train_samples = 32000
n_val_samples = 6500
n_test_samples = n_samples-n_train_samples-n_val_samples
n_test_samples

In [None]:
X_train=np.zeros((n_train_samples,window))
y_train=np.zeros(n_train_samples)

X_val=np.zeros((n_val_samples,window))
y_val=np.zeros(n_val_samples)

X_test=np.zeros((n_test_samples,window))
y_test=np.zeros(n_test_samples)

In [None]:
for i in range(n_train_samples):
  for j in range(window):
    X_train[i,j]=data[i+j]
  y_train[i]=data[i+window]

for i in range(n_val_samples):
  for j in range(window):
    X_val[i,j]=data[n_train_samples+i+j]
  y_val[i]=data[n_train_samples+i+window]

for i in range(n_test_samples):
  for j in range(window):
    X_test[i,j]=data[n_train_samples+n_val_samples+i+j]
  y_test[i]=data[n_train_samples+n_val_samples+i+window]

In [None]:
X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],1))
X_val = np.reshape(X_val,(X_val.shape[0],X_val.shape[1],1))
X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))

In [None]:
model = Sequential()
model.add(LSTM(10,input_shape=(window,1)))
model.add(Dense(1,activation='linear'))

model.summary()

In [None]:
model.compile(optimizer='adam',loss='mse',metrics='mae')

In [None]:
history = model.fit(X_train,y_train,epochs=30,batch_size=32,
                    validation_data=(X_val,y_val))

In [None]:
y_pred = model.predict(X_test)
y_pred_inv = min_max_scaler.inverse_transform(y_pred)
y_test_inv = min_max_scaler.inverse_transform(y_test.reshape(-1,1))

In [None]:
mae = history.history['mae']
val_mae = history.history['val_mae']
epochs = range(1,len(mae)+1)

plt.title('Потери на обучающей и валидационной выборках')
plt.plot(epochs,mae,color='blue',label='Ошибка обучения (MAE)')
plt.plot(epochs,val_mae,color='red',label='Ошибка валидации (MAE)')
plt.xlabel('Эпоха')
plt.ylabel('Ошибка модели (MAE)')
plt.legend()
plt.show()

In [None]:
y_pred = model.predict(X_test)
y_pred_inv = min_max_scaler.inverse_transform(y_pred)
y_test_inv = min_max_scaler.inverse_transform(y_test.reshape(-1,1))

print('R2 Score (Test) = ',round(r2_score(y_true=y_test_inv,y_pred=y_pred_inv),3))
print('MAE (Test) = ',round(mean_absolute_error(y_true=y_test_inv,
                                                     y_pred=y_pred_inv),3))

In [None]:
plt.plot(range(1,len(y_test_inv)+1),y_test_inv)
plt.plot(range(1,len(y_pred_inv)+1),y_pred_inv)