<a href="https://colab.research.google.com/github/BatyrlanBakytbekov/deep_learning/blob/main/alcoholsales_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Prediction of monthly alcohol sales with RNN deep neural network
# Importing required libraries
import numpy as np
import pandas as pd
from tensorflow import keras
import matplotlib.pyplot as plt
import plotly
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras import layers
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv('alcohol_sales.csv')
df

Unnamed: 0,DATE,SALES
0,1992-01-01,3459
1,1992-02-01,3458
2,1992-03-01,4002
3,1992-04-01,4564
4,1992-05-01,4221
...,...,...
320,2018-09-01,12396
321,2018-10-01,13914
322,2018-11-01,14174
323,2018-12-01,15504


In [None]:
# Visualization of data

def plot_timeseries(date, temp):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=date,
                            y=temp,
                            mode='lines',
                            name='Sales',
                            opacity=0.8,
                            line=dict(color='black', width=1)
                            ))

    # Change chart background color
    fig.update_layout(dict(plot_bgcolor = 'white'))

    # Update axes lines
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey',
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey',
                    showline=True, linewidth=1, linecolor='black',
                    title='Date'
                    )

    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey',
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey',
                    showline=True, linewidth=1, linecolor='black',
                    title='Sum of sales'
                    )

    # Set figure title
    fig.update_layout(title=dict(text="Montly alcohol sales",
                                font=dict(color='black')))

    fig.show()

In [None]:
# Execution of visualization function

plot_timeseries(df.DATE, df.SALES)

In [None]:
# Normalization

X = np.array(df.SALES).reshape(-1, 1)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_scaled

array([[0.03431412],
       [0.03423395],
       [0.07784815],
       [0.12290548],
       [0.09540608],
       [0.12009941],
       [0.1150485 ],
       [0.08867153],
       [0.08778963],
       [0.09845266],
       [0.09692937],
       [0.1527299 ],
       [0.        ],
       [0.01843983],
       [0.09051551],
       [0.10791309],
       [0.10230097],
       [0.13348833],
       [0.11440712],
       [0.11432695],
       [0.10687084],
       [0.09660867],
       [0.11777439],
       [0.15577648],
       [0.00352762],
       [0.02773992],
       [0.11320452],
       [0.098613  ],
       [0.11456747],
       [0.15665838],
       [0.10366391],
       [0.13533232],
       [0.10767257],
       [0.09757075],
       [0.14054357],
       [0.15553596],
       [0.02717871],
       [0.03631845],
       [0.09925439],
       [0.07720677],
       [0.13501163],
       [0.15577648],
       [0.09556642],
       [0.15786098],
       [0.0965285 ],
       [0.12210374],
       [0.14591518],
       [0.143

In [None]:
# Train / Test split

train_data, test_data = train_test_split(X_scaled, test_size=0.2, shuffle=False)
print(train_data.shape, test_data.shape)

(260, 1) (65, 1)


In [None]:
# Reshaping the data

def get_XY(dat, time_steps):
    # Prepare Y
    Y_ind = np.arange(time_steps, len(dat), time_steps)
    Y = dat[Y_ind]

    # Prepare X
    rows_x = len(Y)
    X = dat[range(time_steps*rows_x)]
    X = np.reshape(X, (rows_x, time_steps, 1))

    return X, Y

time_steps = 1
trainX, trainY = get_XY(train_data, time_steps)
testX, testY = get_XY(test_data, time_steps)

In [None]:
# Printing out the shapes of splits
print(trainX.shape)
print(trainY.shape)

print(testX.shape)
print(testY.shape)

(259, 1, 1)
(259, 1)
(64, 1, 1)
(64, 1)


In [None]:
# Defining the RNN model structure

np.random.seed(1330)

def create_model():
    model = keras.Sequential(
        [
            layers.InputLayer(input_shape=(time_steps, 1)),
            layers.SimpleRNN(units = 3, activation = "tanh"),
            layers.Dense(units = 1, activation = "tanh"),
            layers.Dense(units = 1, activation = "linear"),
        ]
    )
    return model

RNN_model = create_model()
RNN_model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_4 (SimpleRNN)    (None, 3)                 15        
                                                                 
 dense_8 (Dense)             (None, 1)                 4         
                                                                 
 dense_9 (Dense)             (None, 1)                 2         
                                                                 
Total params: 21
Trainable params: 21
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Compiling the RNN model
RNN_model.compile(loss='mean_squared_error',
              metrics=['MeanSquaredError', 'RootMeanSquaredError'],
              optimizer='adam')

RNN_model.fit(trainX,
          trainY,
          batch_size=1,
          epochs=100,
          verbose=1,
          shuffle=True,
         )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f45e1f377c0>

In [None]:
# RMSE Evaluation

RNN_pred_train = RNN_model.predict(trainX).reshape(-1, 1)
RNN_pred_test = RNN_model.predict(testX).reshape(-1, 1)

print("")
print('---------- RNN model Evaluation on Training Data ----------')
print("RMSE =", mean_squared_error(trainY, RNN_pred_train, squared=False))
print("")

print('---------- RNN model Evaluation on Test Data ----------')
print("RMSE =", mean_squared_error(testY, RNN_pred_test, squared=False))
print("")


---------- RNN model Evaluation on Training Data ----------
RMSE = 0.08215653591565394

---------- RNN model Evaluation on Test Data ----------
RMSE = 0.17641128444382775



In [None]:
# Visualization of prediction in relation to initial data

def plot_predicted(true_data, pred_data):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=np.array(range(0,len(true_data))),
                            y=scaler.inverse_transform(true_data).flatten(),
                            mode='lines',
                            name='Montly alcohol sales - Actual (Test)',
                            opacity=0.8,
                            line=dict(color='black', width=1)
                            ))
    fig.add_trace(go.Scatter(x=np.array(range(0,len(pred_data))),
                            y=scaler.inverse_transform(pred_data).flatten(),
                            mode='lines',
                            name='Montly alcohol sales - Predicted (Test)',
                            opacity=0.8,
                            line=dict(color='red', width=1)
                            ))

    # Change chart background color
    fig.update_layout(dict(plot_bgcolor = 'white'))

    # Update axes lines
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey',
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey',
                    showline=True, linewidth=1, linecolor='black',
                    title='Observation'
                    )

    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey',
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey',
                    showline=True, linewidth=1, linecolor='black',
                    title='Mean'
                    )

    # Set figure title
    fig.update_layout(title=dict(text="Montly alcohol sales number",
                                font=dict(color='black')),
                    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
                    )

    fig.show()

# Plotting the visualization function for RNN prediction

plot_predicted(testY, RNN_pred_test)