# Develop the RNN Regression Model for the Roasting Quality data. The roasting machine had 5 chambers, and there were 3 temperature sensors in each chamber.The layer height and humanity were also measured. The data was collected every one minute and saved in data_X.csv. The roasting quality was measured every one hour and saved in data_Y.csv. Show the loss of each epoch.
Hint: Some of the measurement data was missing. You have to clean the data first.
The loss can be “mse”, “mae”, or “mape” for regression problem.
Reference:
https://www.kaggle.com/code/conormanley/productqualityprediction

In [65]:
import pandas as pd
from pandas import read_csv
import numpy as np
from keras import datasets
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import datetime
import matplotlib.pyplot as plt

In [66]:
data_X = read_csv ("data_X.csv", parse_dates=['date_time'])

In [67]:
data_Y = read_csv ("data_Y.csv", parse_dates=['date_time'])

In [68]:
data_X["date_hour"] = data_X["date_time"].apply(lambda x: x.strftime("%d-%m-%Y-%H"))

In [69]:
L = list(data_X.groupby("date_hour"))
l = len(L)
List = []
for i in range(l):
    dh = L[i][1]["date_hour"].iloc[0]
    row = L[i][1].drop(["date_time", "date_hour"], axis=1).to_numpy().flatten().tolist()
    row.append(dh)
    List.append(row)
    if (i+1)%2000 == 0:
        print("Processing: %.4f%%"%(100*(i+1)/l))

df = pd.DataFrame(List)
df["date_hour"] = df[1020]
df.drop([1020], axis=1, inplace=True)

Processing: 5.7037%
Processing: 11.4074%
Processing: 17.1111%
Processing: 22.8148%
Processing: 28.5185%
Processing: 34.2222%
Processing: 39.9259%
Processing: 45.6295%
Processing: 51.3332%
Processing: 57.0369%
Processing: 62.7406%
Processing: 68.4443%
Processing: 74.1480%
Processing: 79.8517%
Processing: 85.5554%
Processing: 91.2591%
Processing: 96.9628%


In [70]:
data_Y["date_shifted"] = pd.to_datetime(data_Y["date_time"]) - datetime.timedelta(hours=1)
data_Y["date_shifted"] = pd.to_datetime(data_Y["date_shifted"])
data_Y["date_shifted"] = data_Y["date_shifted"].apply(lambda x: x.strftime("%d-%m-%Y-%H"))

In [71]:
transformed_data = pd.merge(df,data_Y[["date_shifted", "quality"]],left_on="date_hour", right_on="date_shifted", how="inner")

In [72]:
df_X = transformed_data.drop(columns=['date_hour', 'date_shifted', 'quality'])
df_Y = transformed_data.quality

In [73]:
df_X = df_X.values.reshape((df_X.shape[0], 60, 17))
df_Y = df_Y.values.reshape((df_Y.shape[0], 1))
df_X = np.asarray(df_X).astype('float32')
df_Y = np.asarray(df_Y).astype('float32')

In [74]:
X_train, X_test, Y_train, Y_test = train_test_split(df_X, df_Y, test_size=0.2, random_state=0)
print(X_train.shape)
print(Y_train.shape)

(23347, 60, 17)
(23347, 1)


### (a) Using RNN with 50 units.

In [80]:
model = Sequential([LSTM(units = 50, activation = 'relu', input_shape = (60,17))])
model.add(Dense(50, activation = 'relu'))
model.add(Dense(1))

model.compile(loss = 'MSE', optimizer = 'adam', metrics = ['mae'])

In [81]:
model.fit(X_train, Y_train, epochs = 40, batch_size = 64)
loss, mape = model.evaluate(X_test, Y_test)
print("loss = ", loss)
print("mape = ", mape)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
loss =  153790.109375
mape =  389.4803771972656


### (b) Try LSTM with different units to achieve better performance if possible.

In [82]:
model = Sequential([LSTM(units = 100, activation = 'relu', input_shape = (60,17))])
model.add(Dense(50, activation = 'relu'))
model.add(Dense(1))

model.compile(loss = 'MSE', optimizer = 'adam', metrics = ['mae'])
model.fit(X_train, Y_train, epochs = 40, batch_size = 64)
loss, mape = model.evaluate(X_test, Y_test)
print("loss = ", loss)
print("mape = ", mape)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
loss =  3483.1875
mape =  48.56021499633789


In [83]:
model = Sequential([LSTM(units = 150, activation = 'relu', input_shape = (60,17))])
model.add(Dense(50, activation = 'relu'))
model.add(Dense(1))

model.compile(loss = 'MSE', optimizer = 'adam', metrics = ['mae'])
model.fit(X_train, Y_train, epochs = 40, batch_size = 64)
loss, mape = model.evaluate(X_test, Y_test)
print("loss = ", loss)
print("mape = ", mape)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
loss =  153012.59375
mape =  388.48101806640625
