In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split

In [2]:
def create_weka_file(group=False):
    df = pd.read_csv('Article_top_Data.csv')
    if group:
        # group the similar Stores
        df.loc[df.StoreNumber.isin([1007, 1033, 1053, 1223, 1311]), 'StoreNumber'] = 0
        df.loc[df.StoreNumber.isin([1001, 1028, 1098, 1303, 1487]), 'StoreNumber'] = 1
        df.loc[df.StoreNumber.isin([1417, 1436, 1504, 2801]), 'StoreNumber'] = 2

    print(df.StoreNumber.unique())
    encoder = LabelEncoder()
    df.StoreNumber = encoder.fit_transform(df.StoreNumber)
    print(df.StoreNumber.unique())

    sales = df.groupby(['Date','StoreNumber', 'Group'], as_index=False).aggregate({'Quantity': 'sum'})
    # Feature matrix with shape (n_series x time_observations)
    ts_series = sales.pivot(index=['Date','StoreNumber'], columns=['Group'])
    ts_series = ts_series.fillna(0)

    ts_series.reset_index(1, inplace=True)

    ts_series.columns = ts_series.columns.droplevel(0)
    ts_series.rename(columns={'':'StoreNumber'}, inplace=True)

    ts_series.columns = ['StoreNumber','Augustiner','Avocado','BO-Laugenbreze','Bananen','Broetchen','GL_Sahne',
                        'Gurken','Kiwi_Gruen','Mozzarella','Milch','Pfandartikel','SammelNr','Schlaufentragetasche','VL_Eier','Zeitschriften']

    # data input
    sales_data = ts_series.to_numpy()
    labels = np.array(ts_series.StoreNumber.values)

    # data preprocessing
    normalized_data = sales_data / np.max(sales_data) 
    num_classes = len(np.unique(labels)) 

    # features and labels
    X = normalized_data[:, 1:]  
    y = labels

    # 3-dim format for LSTM：[data size，step size，features]
    X = np.reshape(X, (X.shape[0], 1, X.shape[1]))

    # one-hot encoder
    y = np.eye(num_classes)[y]

    # LSTM construction
    X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=0.3, random_state=0)
    model = Sequential()
    model.add(LSTM(50, input_shape=(1, 15)))  
    model.add(Dense(num_classes, activation='softmax'))  # softmax for multi-labels classification

    # compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # training
    model.fit(X_train, y_train, epochs=100, batch_size=1, verbose=2)

    # test set
    y_lstm = model.predict(X_test)
    acc = 0
    for ind, list in enumerate(y_lstm):
        predicted_val = np.argmax(list)
        true_val = np.where(y_test[ind]==1)[0]
        if predicted_val == true_val:
            acc+=1
    print(acc/y_test.shape[0])

In [3]:
create_weka_file()

[1001 1007 1028 1033 1053 1098 1223 1303 1311 1417 1436 1487 1504 2801]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13]


2023-07-21 19:48:52.464380: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/100
2968/2968 - 3s - loss: 2.5217 - accuracy: 0.1600 - 3s/epoch - 1ms/step
Epoch 2/100
2968/2968 - 2s - loss: 2.0364 - accuracy: 0.3315 - 2s/epoch - 832us/step
Epoch 3/100
2968/2968 - 3s - loss: 1.8221 - accuracy: 0.3838 - 3s/epoch - 926us/step
Epoch 4/100
2968/2968 - 2s - loss: 1.7217 - accuracy: 0.4164 - 2s/epoch - 835us/step
Epoch 5/100
2968/2968 - 3s - loss: 1.6575 - accuracy: 0.4488 - 3s/epoch - 1ms/step
Epoch 6/100
2968/2968 - 3s - loss: 1.5945 - accuracy: 0.4761 - 3s/epoch - 1ms/step
Epoch 7/100
2968/2968 - 2s - loss: 1.5376 - accuracy: 0.4923 - 2s/epoch - 788us/step
Epoch 8/100
2968/2968 - 3s - loss: 1.4861 - accuracy: 0.5098 - 3s/epoch - 866us/step
Epoch 9/100
2968/2968 - 2s - loss: 1.4398 - accuracy: 0.5249 - 2s/epoch - 814us/step
Epoch 10/100
2968/2968 - 2s - loss: 1.3953 - accuracy: 0.5532 - 2s/epoch - 835us/step
Epoch 11/100
2968/2968 - 2s - loss: 1.3574 - accuracy: 0.5536 - 2s/epoch - 789us/step
Epoch 12/100
2968/2968 - 2s - loss: 1.3236 - accuracy: 0.5694 - 2s/ep

In [4]:
create_weka_file(True)

[1 0 2]
[1 0 2]
Epoch 1/100
637/637 - 2s - loss: 1.0963 - accuracy: 0.3893 - 2s/epoch - 3ms/step
Epoch 2/100
637/637 - 1s - loss: 1.0714 - accuracy: 0.4364 - 505ms/epoch - 793us/step
Epoch 3/100
637/637 - 0s - loss: 1.0026 - accuracy: 0.6311 - 497ms/epoch - 780us/step
Epoch 4/100
637/637 - 0s - loss: 0.8588 - accuracy: 0.6578 - 488ms/epoch - 766us/step
Epoch 5/100
637/637 - 0s - loss: 0.7605 - accuracy: 0.6688 - 492ms/epoch - 772us/step
Epoch 6/100
637/637 - 0s - loss: 0.7204 - accuracy: 0.6735 - 494ms/epoch - 776us/step
Epoch 7/100
637/637 - 1s - loss: 0.6920 - accuracy: 0.6782 - 543ms/epoch - 852us/step
Epoch 8/100
637/637 - 0s - loss: 0.6733 - accuracy: 0.6970 - 494ms/epoch - 776us/step
Epoch 9/100
637/637 - 0s - loss: 0.6566 - accuracy: 0.6986 - 493ms/epoch - 775us/step
Epoch 10/100
637/637 - 0s - loss: 0.6427 - accuracy: 0.6970 - 489ms/epoch - 767us/step
Epoch 11/100
637/637 - 0s - loss: 0.6333 - accuracy: 0.7080 - 498ms/epoch - 781us/step
Epoch 12/100
637/637 - 0s - loss: 0.6211 