In [35]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasRegressor
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error
import pandas as pd
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objs as go

In [36]:

df_2020 = pd.read_csv('grouped/2020/grouped_by_subcat_2020.csv', encoding='UTF-8')
df_2021 = pd.read_csv('grouped/2021/grouped_by_subcat_2021.csv', encoding='UTF-8')
df = pd.concat([df_2021, df_2020], axis=0)

#df = pd.concat([df, df_2020], axis=0)
df = df[df['CATEGORY'] == 'Сковорода']
df
df['DATE'] = pd.to_datetime(df['DATE'])
df = df.groupby(['DATE','CATEGORY']).agg({'QUANTITY': 'sum'}).reset_index()
df = df[['DATE', 'QUANTITY']]
train_data = df
train_data

Unnamed: 0,DATE,QUANTITY
0,2020-01-01,21.0
1,2020-01-02,424.0
2,2020-01-03,292.0
3,2020-01-04,352.0
4,2020-01-05,592.0
...,...,...
726,2021-12-27,429.0
727,2021-12-28,518.0
728,2021-12-29,625.0
729,2021-12-30,812.0


In [37]:
data_real = pd.read_csv('grouped/2023/grouped_by_subcat_2023.csv', encoding='UTF-8')
data_real = data_real[data_real['CATEGORY'] == 'Сковорода']
data_real['DATE'] = pd.to_datetime(data_real['DATE']) 
data_real = data_real.groupby(['DATE','CATEGORY']).agg({'QUANTITY': 'sum'}).reset_index()
data_real = data_real[['DATE', 'QUANTITY']]
data_real

Unnamed: 0,DATE,QUANTITY
0,2023-01-01,11.0
1,2023-01-02,313.0
2,2023-01-03,229.0
3,2023-01-04,224.0
4,2023-01-05,246.0
...,...,...
360,2023-12-27,327.0
361,2023-12-28,436.0
362,2023-12-29,559.0
363,2023-12-30,885.0


In [38]:
df_2022 = pd.read_csv('grouped/2022/grouped_by_subcat_2022.csv', encoding='UTF-8')
df_2022 = df_2022[df_2022['CATEGORY'] == 'Сковорода']
df_2022 = df_2022.groupby(['DATE','CATEGORY']).agg({'QUANTITY': 'sum'}).reset_index()
df_2022 = df_2022[['DATE', 'QUANTITY']]
df_2022
test_data = df_2022
test_data.reset_index(drop=True, inplace=True)
test_data

Unnamed: 0,DATE,QUANTITY
0,2022-01-01,18.0
1,2022-01-02,331.0
2,2022-01-03,167.0
3,2022-01-04,220.0
4,2022-01-05,264.0
...,...,...
360,2022-12-27,474.0
361,2022-12-28,547.0
362,2022-12-29,719.0
363,2022-12-30,1022.0


In [39]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_train_data = scaler.fit_transform(train_data['QUANTITY'].values.reshape(-1, 1))
scaled_test_data = scaler.transform(test_data['QUANTITY'].values.reshape(-1, 1))

def create_window_data(data, window_size):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data[i-window_size:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

window_size = 60  

X_train, y_train = create_window_data(scaled_train_data, window_size)
X_test, y_test = create_window_data(scaled_test_data, window_size)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

model = Sequential()
model.add(LSTM(units=175, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.25))
model.add(LSTM(units=150, return_sequences=True))
model.add(Dropout(0.25))
model.add(LSTM(units=120))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mean_squared_error')

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model.fit(X_train, y_train, epochs=75, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

predicted_sales = model.predict(X_test)
predicted_sales = scaler.inverse_transform(predicted_sales)
predicted_sales

Epoch 1/75



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 103ms/step - loss: 0.0308 - val_loss: 0.0122
Epoch 2/75
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 79ms/step - loss: 0.0149 - val_loss: 0.0121
Epoch 3/75
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 90ms/step - loss: 0.0142 - val_loss: 0.0115
Epoch 4/75
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 80ms/step - loss: 0.0124 - val_loss: 0.0131
Epoch 5/75
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 85ms/step - loss: 0.0121 - val_loss: 0.0111
Epoch 6/75
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 75ms/step - loss: 0.0135 - val_loss: 0.0120
Epoch 7/75
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 80ms/step - loss: 0.0156 - val_loss: 0.0107
Epoch 8/75
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 80ms/step - loss: 0.0130 - val_loss: 0.0110
Epoch 9/75
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

array([[240.72847 ],
       [285.30084 ],
       [393.19183 ],
       [550.97955 ],
       [556.6295  ],
       [400.5401  ],
       [441.10757 ],
       [514.33746 ],
       [463.94675 ],
       [410.90073 ],
       [451.93765 ],
       [510.14322 ],
       [382.84122 ],
       [217.92595 ],
       [215.54286 ],
       [217.42526 ],
       [218.99086 ],
       [276.53677 ],
       [342.1205  ],
       [230.30182 ],
       [125.67464 ],
       [110.62823 ],
       [110.440216],
       [119.06687 ],
       [172.17279 ],
       [227.69919 ],
       [157.96933 ],
       [ 93.732635],
       [ 92.65227 ],
       [ 84.78722 ],
       [111.71923 ],
       [205.74489 ],
       [256.2859  ],
       [151.01405 ],
       [ 97.97308 ],
       [109.14254 ],
       [117.46058 ],
       [147.68604 ],
       [254.33893 ],
       [306.9414  ],
       [178.99817 ],
       [125.2911  ],
       [139.83238 ],
       [136.70963 ],
       [154.5175  ],
       [253.70651 ],
       [298.20255 ],
       [152.5

In [16]:
scaler = MinMaxScaler(feature_range=(0, 1))

data_2023 = test_data

scaled_data_2023 = scaler.fit_transform(data_2023['QUANTITY'].values.reshape(-1, 1))

def create_window_data(data, window_size):
    X = []
    for i in range(window_size, len(data)):
        X.append(data[i-window_size:i, 0])
    return np.array(X)

window_size = 30  
X_2023 = create_window_data(scaled_data_2023, window_size)
X_2023 = np.reshape(X_2023, (X_2023.shape[0], X_2023.shape[1], 1))

predictions = []
input_seq = X_2023[-1] 

for _ in range(60): 
    pred = model.predict(input_seq.reshape(1, window_size, 1))
    predictions.append(pred[0][0])
    input_seq = np.append(input_seq[1:], pred).reshape(window_size, 1)

predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

prediction_dates = pd.date_range(start='2024-01-01', periods=60)
predicted_sales_df = pd.DataFrame(predictions, columns=['QUANTITY'])
predicted_sales_df['DATE'] = prediction_dates


print("Предсказания на первые два месяца 2024 года:")
print(predicted_sales_df)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

In [17]:
predicted_sales_df

Unnamed: 0,QUANTITY,DATE
0,550.654236,2024-01-01
1,292.735229,2024-01-02
2,292.49173,2024-01-03
3,265.642426,2024-01-04
4,286.3367,2024-01-05
5,354.550323,2024-01-06
6,365.918213,2024-01-07
7,274.675934,2024-01-08
8,220.174576,2024-01-09
9,206.468887,2024-01-10


In [40]:
predicted_dates = test_data['DATE'].iloc[window_size:]
predicted_df = pd.DataFrame({'DATE': predicted_dates, 'QUANTITY': predicted_sales.flatten()})
predicted_df.reset_index(drop=True, inplace=True)
predicted_df

Unnamed: 0,DATE,QUANTITY
0,2022-03-02,240.728470
1,2022-03-03,285.300842
2,2022-03-04,393.191833
3,2022-03-05,550.979553
4,2022-03-06,556.629517
...,...,...
300,2022-12-27,308.229126
301,2022-12-28,364.475189
302,2022-12-29,450.840637
303,2022-12-30,712.356750


In [42]:
# Создание Dash приложения

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Прогнозирование продаж"),
    dcc.Graph(id='sales-forecast-graph')
])

@app.callback(
    Output('sales-forecast-graph', 'figure'),
    [Input('sales-forecast-graph', 'id')]
)
def update_graph(_):
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=test_data['DATE'], y=test_data['QUANTITY'], mode='lines', name='Обучение'))

    fig.add_trace(go.Scatter(x=data_real['DATE'], y=data_real['QUANTITY'], mode='lines', name='Реальность'))
    fig.add_trace(go.Scatter(x=predicted_df['DATE'], y=predicted_df['QUANTITY'], mode='lines', name='Прогноз'))

    fig.update_layout(
        title="Прогнозирование продаж",
        xaxis_title="Дата",
        yaxis_title="Количество продаж",
        legend_title="Информация"
    )
    return fig

app.run_server(debug=True, port=5099)

In [8]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(test_data['QUANTITY'][window_size:], predicted_sales)
print("Mean Absolute Error on Test Data:", mae)

Mean Absolute Error on Test Data: 38.8466798696945
