In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
def create_weka_file(group=False):
    df = pd.read_csv('Article_top_Data.csv')
    if group:
        # group the similar Stores
        df.loc[df.StoreNumber.isin([1007, 1033, 1053, 1223, 1311]), 'StoreNumber'] = 0
        df.loc[df.StoreNumber.isin([1001, 1028, 1098, 1303, 1487]), 'StoreNumber'] = 1
        df.loc[df.StoreNumber.isin([1417, 1436, 1504, 2801]), 'StoreNumber'] = 2
    print(df.StoreNumber.unique())
    encoder = LabelEncoder()
    df.StoreNumber = encoder.fit_transform(df.StoreNumber)
    print(df.StoreNumber.unique())
    df.StoreNumber = df.StoreNumber.apply(lambda x: f'store_{x}')
    print(df.StoreNumber.unique())

    sales = df.groupby(['Date','StoreNumber', 'Group'], as_index=False).aggregate({'Quantity': 'sum'})
    # Feature matrix with shape (n_series x time_observations)
    ts_series = sales.pivot(index=['Date','StoreNumber'], columns=['Group'])
    ts_series = ts_series.fillna(0)

    ts_series.reset_index(1, inplace=True)

    ts_series.columns = ts_series.columns.droplevel(0)
    ts_series.rename(columns={'':'StoreNumber'}, inplace=True)

    ts_series.columns = ['StoreNumber','Augustiner','Avocado','BO-Laugenbreze','Bananen','Broetchen','GL_Sahne',
                        'Gurken','Kiwi_Gruen','Mozzarella','Milch','Pfandartikel','SammelNr','Schlaufentragetasche','VL_Eier','Zeitschriften']
    import arff
    if group:
        import arff
        arff.dump('store_weka_group.arff',
                ts_series.values,
                relation = 'stores',
                names = ts_series.columns)
    else:
        arff.dump('store_weka.arff',
                ts_series.values,
                relation = 'stores',
                names = ts_series.columns)

In [3]:
create_weka_file()

[1001 1007 1028 1033 1053 1098 1223 1303 1311 1417 1436 1487 1504 2801]
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13]
['store_0' 'store_1' 'store_2' 'store_3' 'store_4' 'store_5' 'store_6'
 'store_7' 'store_8' 'store_9' 'store_10' 'store_11' 'store_12' 'store_13']


In [4]:
create_weka_file(True)

[1 0 2]
[1 0 2]
['store_1' 'store_0' 'store_2']
