In [1]:
import numpy as np
import pandas as pd
from category_encoders import BinaryEncoder
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

import matplotlib.pyplot as plt
%matplotlib inline

In [16]:
pd.set_option('display.max_columns', 1000)

# 1 数据预处理

In [2]:
order_cate_month = pd.read_csv("../../data/level1/order-history.txt", sep='\t')
cate_to_code = {'烟机': 'CRYJ', 
                '灶具': 'CRZJ', 
                '消毒柜': 'CRXDG', 
                '洗碗机': 'CRXWJ', 
                '电热': 'DR', 
                '燃热': 'RR', 
                '净水': 'JSJ', 
                '饮水': 'YSJ'}
order_cate_month['category'] = order_cate_month.category.map(cate_to_code)

In [4]:
category = order_cate_month[['category']]

# Ordinal Encoding
label_encoder = LabelEncoder()
category['cate_code'] = label_encoder.fit_transform(category.category)
category.set_index('category', inplace=True)

# Binary Encoding
# binary_encoder = BinaryEncoder(cols=['category'])
# category_binary = binary_encoder.fit_transform(category)
# category = pd.concat([category, category_binary], axis=1)
# category.set_index('category', inplace=True)

In [7]:
order_cate_month.set_index('category', inplace=True)
order_cate_month.columns = pd.date_range(start='2016-01-31', end='2018-12-31', freq='M')

In [10]:
def row_normalization(df):
    """Normalize each row of data."""
    df_bak = df.copy()
    rnames = list(df_bak.index)
    scalers = dict()
    for rn in rnames:
        scaler = MinMaxScaler().fit(df_bak.loc[rn].values.reshape(-1, 1))
        df_bak.loc[rn] = scaler.transform(df_bak.loc[rn].values.reshape(-1, 1)).ravel()
        scalers[rn] = scaler
    return df_bak, scalers


def row_restore(df_normalized, scalers):
    """Convert data back from normalized values."""
    df_bak = df_normalized.copy()
    rnames = list(df_bak.index)
    for rn in rnames:
        scaler = scalers[rn]
        df_bak.loc[rn] = scaler.inverse_transform(df_bak.loc[rn].values.reshape(-1, 1)).ravel()
    return df_bak

In [15]:
order_cate_month

Unnamed: 0_level_0,2016-01-31 00:00:00,2016-02-29 00:00:00,2016-03-31 00:00:00,2016-04-30 00:00:00,2016-05-31 00:00:00,2016-06-30 00:00:00,2016-07-31 00:00:00,2016-08-31 00:00:00,2016-09-30 00:00:00,2016-10-31 00:00:00,...,2018-03-31 00:00:00,2018-04-30 00:00:00,2018-05-31 00:00:00,2018-06-30 00:00:00,2018-07-31 00:00:00,2018-08-31 00:00:00,2018-09-30 00:00:00,2018-10-31 00:00:00,2018-11-30 00:00:00,2018-12-31 00:00:00
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CRYJ,28.8001,3.5295,20.0109,13.786,15.6494,12.2787,13.4059,18.5025,19.8374,25.7658,...,25.5676,7.1679,14.6512,5.6166,0.8526,11.7562,13.4868,20.3268,17.2094,15.1807
CRZJ,38.9779,3.8229,21.7193,8.4581,13.7532,9.4777,9.2029,13.4919,17.3669,26.8086,...,30.2834,6.957,13.4579,6.254,-0.2096,8.7197,12.4396,23.3712,16.8041,15.5517
CRXDG,6.1313,0.4546,4.6248,4.1901,5.0774,3.1966,3.6534,3.6352,5.8829,7.2654,...,4.8796,1.8125,4.6609,1.0848,-0.3216,3.5279,4.0124,4.1301,1.6926,1.4649
CRXWJ,0.3973,0.0533,0.3103,0.2698,0.6287,0.4927,0.3527,0.3168,0.9708,1.6751,...,5.1748,0.8913,0.4937,-0.4092,-0.0637,0.7514,1.2591,3.1889,1.2983,1.7356
DR,86.3268,26.2495,58.3809,45.0692,52.1094,47.7539,27.9732,54.9885,39.8295,71.4114,...,79.7048,69.3353,66.6816,25.1697,37.0122,55.8656,72.429,80.6107,78.4249,45.8093
RR,14.2495,5.31,12.1832,12.3467,13.0489,9.2844,7.8842,11.5168,9.6498,18.6803,...,14.7832,12.0148,11.2635,6.0376,8.6216,11.609,14.3133,20.4242,18.5571,9.9498
JSJ,12.8027,3.227,7.8923,4.7224,7.8,6.4379,4.1973,6.7925,7.0164,11.0526,...,13.0406,13.0257,13.3344,11.3069,9.7074,10.5179,13.7273,19.6269,15.6126,6.3717
YSJ,26.5691,8.0639,19.8595,17.8514,19.8307,14.8368,7.9924,15.9952,15.9746,24.9691,...,9.7662,12.2184,11.0708,9.4122,7.9454,9.9526,12.6942,16.2457,12.3084,7.7977
