# Data prepation with daily aggregation
The observation period is split into 4 periods and purchase history is aggregated by client and period and then flattened into a two-dimentional data set.

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Client data

In [3]:
clients = pd.read_csv('clients2.csv',
                     index_col='client_id',
                     parse_dates=['first_issue_date', 'first_redeem_date'],
                     na_values={'gender': 'U'})\
    .drop(columns='client_id.1')
clients

Unnamed: 0_level_0,first_issue_date,first_redeem_date,age,gender
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000012768d,2017-08-05 15:40:48,2018-01-04 19:30:07,45,
000036f903,2017-04-10 13:54:23,2017-04-23 12:37:56,72,F
00010925a5,2018-07-24 16:21:29,2018-09-14 16:12:49,83,
0001f552b0,2017-06-30 19:20:38,2018-08-28 12:59:45,33,F
00020e7b18,2017-11-27 11:41:45,2018-01-10 17:50:05,73,
...,...,...,...,...
fffe0abb97,2017-11-27 08:56:54,2018-02-11 09:26:08,35,F
fffe0ed719,2017-09-15 08:53:24,2017-12-12 14:50:12,69,
fffea1204c,2018-01-31 16:59:37,2018-03-12 17:02:27,73,F
fffeca6d22,2017-12-28 11:56:13,NaT,77,F


# Age

There are negative values and values above 100 (as high as 1852).

Calculate mean age using clients within reasonable age range:

In [4]:
mean_age = clients.loc[(clients.age < 90) & (clients.age > 10), 'age'].mean()
mean_age

46.37953291808248

In [5]:
clients.loc[(clients.age > 90) | (clients.age < 10), 'age'] = mean_age

# Gender

In [6]:
clients = pd.get_dummies(clients, columns=['gender'])
clients

Unnamed: 0_level_0,first_issue_date,first_redeem_date,age,gender_F,gender_M
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
000012768d,2017-08-05 15:40:48,2018-01-04 19:30:07,45.0,0,0
000036f903,2017-04-10 13:54:23,2017-04-23 12:37:56,72.0,1,0
00010925a5,2018-07-24 16:21:29,2018-09-14 16:12:49,83.0,0,0
0001f552b0,2017-06-30 19:20:38,2018-08-28 12:59:45,33.0,1,0
00020e7b18,2017-11-27 11:41:45,2018-01-10 17:50:05,73.0,0,0
...,...,...,...,...,...
fffe0abb97,2017-11-27 08:56:54,2018-02-11 09:26:08,35.0,1,0
fffe0ed719,2017-09-15 08:53:24,2017-12-12 14:50:12,69.0,0,0
fffea1204c,2018-01-31 16:59:37,2018-03-12 17:02:27,73.0,1,0
fffeca6d22,2017-12-28 11:56:13,NaT,77.0,1,0


# Issue and redeem dates

## Clients that have not redeemed

In [7]:
clients['no_redeem'] = clients.first_redeem_date.isna().astype('int')

In [8]:
clients.isna().sum()

first_issue_date         0
first_redeem_date    17546
age                      0
gender_F                 0
gender_M                 0
no_redeem                0
dtype: int64

In [9]:
clients.loc[clients.no_redeem == 1, :]

Unnamed: 0_level_0,first_issue_date,first_redeem_date,age,gender_F,gender_M,no_redeem
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
00068fd5dc,2018-12-15 11:14:26,NaT,72.000000,1,0,1
0006fca4bf,2019-02-20 10:47:30,NaT,57.000000,0,0,1
0009e6bafa,2018-10-01 18:40:29,NaT,32.000000,0,0,1
000d599743,2018-08-29 09:08:39,NaT,46.379533,0,0,1
00140e5d34,2019-03-07 13:10:31,NaT,50.000000,1,0,1
...,...,...,...,...,...,...
ffe2d2bdbc,2018-02-16 12:06:15,NaT,82.000000,0,0,1
ffe4b6aa1a,2019-01-19 19:01:44,NaT,56.000000,0,0,1
fff2b6bf63,2018-10-18 19:01:34,NaT,46.000000,0,0,1
fff336ba7b,2017-12-09 11:58:58,NaT,16.000000,0,0,1


## Use the last redeem date to fill missing values

In [10]:
last_redeem = clients.first_redeem_date.max(); last_redeem

Timestamp('2019-11-20 01:14:10')

In [11]:
clients.fillna(last_redeem, inplace=True)

In [12]:
clients.isna().sum()

first_issue_date     0
first_redeem_date    0
age                  0
gender_F             0
gender_M             0
no_redeem            0
dtype: int64

In [13]:
clients['first_issue_date'] = (clients['first_issue_date'] - pd.Timestamp("1970-01-01")).dt.days
clients['first_redeem_date'] = (clients['first_redeem_date'] - pd.Timestamp("1970-01-01")).dt.days
clients

Unnamed: 0_level_0,first_issue_date,first_redeem_date,age,gender_F,gender_M,no_redeem
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000012768d,17383,17535,45.0,0,0,0
000036f903,17266,17279,72.0,1,0,0
00010925a5,17736,17788,83.0,0,0,0
0001f552b0,17347,17771,33.0,1,0,0
00020e7b18,17497,17541,73.0,0,0,0
...,...,...,...,...,...,...
fffe0abb97,17497,17573,35.0,1,0,0
fffe0ed719,17424,17512,69.0,0,0,0
fffea1204c,17562,17602,73.0,1,0,0
fffeca6d22,17528,18220,77.0,1,0,1


### Scaling

In [14]:
contin_vars = ['first_issue_date', 'first_redeem_date', 'age']

In [15]:
scaler = StandardScaler()

In [16]:
scaled = scaler.fit_transform(clients[contin_vars])

In [17]:
clients[contin_vars] = pd.DataFrame(scaled, columns=contin_vars, index=clients.index)
clients

Unnamed: 0_level_0,first_issue_date,first_redeem_date,age,gender_F,gender_M,no_redeem
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
000012768d,-0.769796,-0.855610,-0.087012,0,0,0
000036f903,-1.344307,-1.861964,1.615538,1,0,0
00010925a5,0.963560,0.138951,2.309169,0,0,0
0001f552b0,-0.946568,0.072123,-0.843701,1,0,0
00020e7b18,-0.210015,-0.832023,1.678595,0,0,0
...,...,...,...,...,...,...
fffe0abb97,-0.210015,-0.706229,-0.717586,1,0,0
fffe0ed719,-0.568471,-0.946024,1.426365,0,0,0
fffea1204c,0.109158,-0.592228,1.678595,1,0,0
fffeca6d22,-0.057794,1.837174,1.930825,1,0,1


In [18]:
clients.to_csv('clients2_modified.csv')

# Purchase data aggregation

In [3]:
test_purch = pd.read_csv('test_purch.csv', parse_dates=['transaction_datetime'])

In [4]:
train_purch = pd.read_csv('train_purch.csv', parse_dates=['transaction_datetime'])
train_purch

Unnamed: 0,client_id,transaction_id,transaction_datetime,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,store_id,product_id,product_quantity,trn_sum_from_iss,trn_sum_from_red
0,000012768d,7e3e2e3984,2018-12-01 07:12:45,10.0,0.0,0.0,0.0,1007.0,54a4a11a29,9a80204f78,2.0,80.0,
1,000012768d,7e3e2e3984,2018-12-01 07:12:45,10.0,0.0,0.0,0.0,1007.0,54a4a11a29,da89ebd374,1.0,65.0,
2,000012768d,7e3e2e3984,2018-12-01 07:12:45,10.0,0.0,0.0,0.0,1007.0,54a4a11a29,0a95e1151d,1.0,24.0,
3,000012768d,7e3e2e3984,2018-12-01 07:12:45,10.0,0.0,0.0,0.0,1007.0,54a4a11a29,4055b15e4a,2.0,50.0,
4,000012768d,7e3e2e3984,2018-12-01 07:12:45,10.0,0.0,0.0,0.0,1007.0,54a4a11a29,a685f1916b,1.0,22.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15998947,fffeca6d22,a0bb11a968,2019-03-09 07:47:43,0.4,0.0,0.0,0.0,99.0,a0613e36b5,de437430e2,1.0,19.0,
15998948,fffeca6d22,a0bb11a968,2019-03-09 07:47:43,0.4,0.0,0.0,0.0,99.0,a0613e36b5,ad8fee4200,1.0,45.0,
15998949,fffeca6d22,3b876a5532,2019-03-15 14:46:39,0.8,0.0,0.0,0.0,165.0,a0613e36b5,ad8fee4200,2.0,94.0,
15998950,fffeca6d22,3b876a5532,2019-03-15 14:46:39,0.8,0.0,0.0,0.0,165.0,a0613e36b5,230dbb0c16,1.0,50.0,


In [5]:
test_purch = test_purch\
    .groupby(['client_id','transaction_datetime'],
            as_index=False)\
    .agg({'regular_points_received': 'mean',
          'express_points_received': 'mean',
          'regular_points_spent': 'mean', 
          'express_points_spent': 'mean', 
          'purchase_sum': 'mean',
         'product_id': 'count',
         'product_quantity': 'sum',
         'trn_sum_from_iss': 'sum',
         'trn_sum_from_red': 'sum'})
test_purch.rename(columns = {'product_id': 'n_products'}, inplace=True)
test_purch

Unnamed: 0,client_id,transaction_datetime,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red
0,00010925a5,2018-11-25 10:56:18,1.5,0.0,0.0,0.0,303.0,5,7.0,303.0,0.0
1,00010925a5,2018-12-01 09:17:18,0.8,0.0,0.0,0.0,132.0,3,2.0,132.0,0.0
2,00010925a5,2018-12-03 07:52:46,0.7,0.0,0.0,0.0,149.0,3,3.0,150.0,0.0
3,00010925a5,2018-12-06 09:59:19,1.7,0.0,0.0,0.0,349.0,6,7.0,349.0,0.0
4,00010925a5,2018-12-17 12:51:26,5.8,0.0,0.0,0.0,581.0,8,8.0,582.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
1209945,fffff6ce77,2019-02-28 12:56:31,1.8,0.0,0.0,0.0,446.0,9,10.0,446.0,0.0
1209946,fffff6ce77,2019-03-05 15:07:03,0.3,0.0,0.0,0.0,247.0,1,1.0,247.0,0.0
1209947,fffff6ce77,2019-03-06 16:26:35,0.1,0.0,0.0,0.0,109.0,2,2.0,110.0,0.0
1209948,fffff6ce77,2019-03-06 16:53:10,0.2,0.0,0.0,0.0,245.0,1,1.0,245.0,0.0


In [6]:
train_purch = train_purch\
    .groupby(['client_id','transaction_datetime'],
            as_index=False)\
    .agg({'regular_points_received': 'mean',
          'express_points_received': 'mean',
          'regular_points_spent': 'mean', 
          'express_points_spent': 'mean', 
          'purchase_sum': 'mean',
         'product_id': 'count',
         'product_quantity': 'sum',
         'trn_sum_from_iss': 'sum',
         'trn_sum_from_red': 'sum'})
train_purch.rename(columns = {'product_id': 'n_products'}, inplace=True)
train_purch

Unnamed: 0,client_id,transaction_datetime,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red
0,000012768d,2018-12-01 07:12:45,10.0,0.0,0.0,0.0,1007.0,19,21.0,1007.0,0.0
1,000012768d,2018-12-16 08:56:01,5.7,0.0,0.0,0.0,574.0,11,14.0,575.0,0.0
2,000012768d,2019-03-08 10:12:03,8.0,0.0,0.0,0.0,803.0,16,13.0,804.0,0.0
3,000012768d,2019-03-14 15:01:47,2.0,0.0,0.0,0.0,419.0,6,6.0,419.0,0.0
4,000036f903,2018-11-28 10:48:36,1.2,0.0,0.0,0.0,241.0,5,4.0,241.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
2814986,fffeca6d22,2019-02-22 09:34:13,5.0,0.0,0.0,0.0,110.0,3,3.0,111.0,0.0
2814987,fffeca6d22,2019-02-22 09:34:53,6.0,0.0,0.0,0.0,138.0,1,2.0,139.0,0.0
2814988,fffeca6d22,2019-02-28 10:25:24,0.6,0.0,0.0,0.0,128.0,2,3.0,128.0,0.0
2814989,fffeca6d22,2019-03-09 07:47:43,0.4,0.0,0.0,0.0,99.0,3,3.0,100.0,0.0


In [7]:
train_purch.transaction_datetime.min()

Timestamp('2018-11-21 21:02:33')

In [8]:
train_purch.transaction_datetime.max()

Timestamp('2019-03-18 23:19:28')

In [9]:
test_purch.transaction_datetime.min()

Timestamp('2018-11-21 21:02:51')

In [10]:
test_purch.transaction_datetime.max()

Timestamp('2019-03-18 22:08:09')

In [11]:
start = min(
    train_purch.transaction_datetime.min(),
    test_purch.transaction_datetime.min()
)
start

Timestamp('2018-11-21 21:02:33')

In [12]:
finish = max(
    train_purch.transaction_datetime.max(),
    test_purch.transaction_datetime.max()
)
finish

Timestamp('2019-03-18 23:19:28')

Number of intervals:

In [13]:
n_intervals = 4

In [14]:
bins = [start + i * (finish - start) / n_intervals for i in range(0, n_intervals + 1)]
bins

[Timestamp('2018-11-21 21:02:33'),
 Timestamp('2018-12-21 03:36:46.750000'),
 Timestamp('2019-01-19 10:11:00.500000'),
 Timestamp('2019-02-17 16:45:14.250000'),
 Timestamp('2019-03-18 23:19:28')]

In [15]:
test_purch['interval'] = pd.cut(test_purch.transaction_datetime, bins, labels=False)
test_purch

Unnamed: 0,client_id,transaction_datetime,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red,interval
0,00010925a5,2018-11-25 10:56:18,1.5,0.0,0.0,0.0,303.0,5,7.0,303.0,0.0,0
1,00010925a5,2018-12-01 09:17:18,0.8,0.0,0.0,0.0,132.0,3,2.0,132.0,0.0,0
2,00010925a5,2018-12-03 07:52:46,0.7,0.0,0.0,0.0,149.0,3,3.0,150.0,0.0,0
3,00010925a5,2018-12-06 09:59:19,1.7,0.0,0.0,0.0,349.0,6,7.0,349.0,0.0,0
4,00010925a5,2018-12-17 12:51:26,5.8,0.0,0.0,0.0,581.0,8,8.0,582.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1209945,fffff6ce77,2019-02-28 12:56:31,1.8,0.0,0.0,0.0,446.0,9,10.0,446.0,0.0,3
1209946,fffff6ce77,2019-03-05 15:07:03,0.3,0.0,0.0,0.0,247.0,1,1.0,247.0,0.0,3
1209947,fffff6ce77,2019-03-06 16:26:35,0.1,0.0,0.0,0.0,109.0,2,2.0,110.0,0.0,3
1209948,fffff6ce77,2019-03-06 16:53:10,0.2,0.0,0.0,0.0,245.0,1,1.0,245.0,0.0,3


In [16]:
train_purch['interval'] = pd.cut(train_purch.transaction_datetime, bins, labels=False)
train_purch

Unnamed: 0,client_id,transaction_datetime,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red,interval
0,000012768d,2018-12-01 07:12:45,10.0,0.0,0.0,0.0,1007.0,19,21.0,1007.0,0.0,0.0
1,000012768d,2018-12-16 08:56:01,5.7,0.0,0.0,0.0,574.0,11,14.0,575.0,0.0,0.0
2,000012768d,2019-03-08 10:12:03,8.0,0.0,0.0,0.0,803.0,16,13.0,804.0,0.0,3.0
3,000012768d,2019-03-14 15:01:47,2.0,0.0,0.0,0.0,419.0,6,6.0,419.0,0.0,3.0
4,000036f903,2018-11-28 10:48:36,1.2,0.0,0.0,0.0,241.0,5,4.0,241.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2814986,fffeca6d22,2019-02-22 09:34:13,5.0,0.0,0.0,0.0,110.0,3,3.0,111.0,0.0,3.0
2814987,fffeca6d22,2019-02-22 09:34:53,6.0,0.0,0.0,0.0,138.0,1,2.0,139.0,0.0,3.0
2814988,fffeca6d22,2019-02-28 10:25:24,0.6,0.0,0.0,0.0,128.0,2,3.0,128.0,0.0,3.0
2814989,fffeca6d22,2019-03-09 07:47:43,0.4,0.0,0.0,0.0,99.0,3,3.0,100.0,0.0,3.0


In [17]:
test_purch.isna().sum()

client_id                  0
transaction_datetime       0
regular_points_received    0
express_points_received    0
regular_points_spent       0
express_points_spent       0
purchase_sum               0
n_products                 0
product_quantity           0
trn_sum_from_iss           0
trn_sum_from_red           0
interval                   0
dtype: int64

In [18]:
train_purch.isna().sum()

client_id                  0
transaction_datetime       0
regular_points_received    0
express_points_received    0
regular_points_spent       0
express_points_spent       0
purchase_sum               0
n_products                 0
product_quantity           0
trn_sum_from_iss           0
trn_sum_from_red           0
interval                   1
dtype: int64

In [19]:
train_purch.loc[train_purch.interval.isna()]

Unnamed: 0,client_id,transaction_datetime,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red,interval
434367,27df8bc8bb,2018-11-21 21:02:33,1.5,0.0,0.0,0.0,224.53,5,8.0,226.0,0.0,


In [20]:
train_purch.fillna(value=0, inplace=True)

In [21]:
train_purch.isna().sum()

client_id                  0
transaction_datetime       0
regular_points_received    0
express_points_received    0
regular_points_spent       0
express_points_spent       0
purchase_sum               0
n_products                 0
product_quantity           0
trn_sum_from_iss           0
trn_sum_from_red           0
interval                   0
dtype: int64

In [22]:
train_purch.dtypes

client_id                          object
transaction_datetime       datetime64[ns]
regular_points_received           float64
express_points_received           float64
regular_points_spent              float64
express_points_spent              float64
purchase_sum                      float64
n_products                          int64
product_quantity                  float64
trn_sum_from_iss                  float64
trn_sum_from_red                  float64
interval                          float64
dtype: object

In [23]:
train_purch.interval = train_purch.interval.astype('int')

In [24]:
train_purch.dtypes

client_id                          object
transaction_datetime       datetime64[ns]
regular_points_received           float64
express_points_received           float64
regular_points_spent              float64
express_points_spent              float64
purchase_sum                      float64
n_products                          int64
product_quantity                  float64
trn_sum_from_iss                  float64
trn_sum_from_red                  float64
interval                            int64
dtype: object

In [25]:
test_purch = test_purch\
    .groupby(['client_id','interval'])\
    .agg({'regular_points_received': 'sum',
          'express_points_received': 'sum',
          'regular_points_spent': 'sum', 
          'express_points_spent': 'sum', 
          'purchase_sum': 'sum',
         'n_products': 'sum',
         'product_quantity': 'sum',
         'trn_sum_from_iss': 'sum',
         'trn_sum_from_red': 'sum'})
test_purch

Unnamed: 0_level_0,Unnamed: 1_level_0,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red
client_id,interval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
00010925a5,0,11.4,0.0,0.0,0.0,1697.00,28,30.0,1700.0,0.0
00010925a5,1,2.1,0.0,-17.0,0.0,438.00,5,4.0,420.0,438.0
00010925a5,2,4.4,0.0,0.0,0.0,890.00,12,12.0,892.0,0.0
00010925a5,3,13.9,0.0,0.0,0.0,2858.00,33,33.0,2861.0,0.0
00035a21d9,2,8.8,0.0,0.0,0.0,1005.94,13,13.0,1007.0,0.0
...,...,...,...,...,...,...,...,...,...,...
fffea1204c,3,10.9,0.0,-40.0,0.0,1746.41,24,40.0,1708.0,235.0
fffff6ce77,0,42.7,0.0,0.0,0.0,2150.59,17,26.0,2150.0,0.0
fffff6ce77,1,80.8,0.0,0.0,0.0,7785.00,74,114.0,7790.0,0.0
fffff6ce77,2,70.5,0.0,-302.0,0.0,4192.00,68,77.0,3900.0,1055.0


In [26]:
train_purch = train_purch\
    .groupby(['client_id','interval'])\
    .agg({'regular_points_received': 'sum',
          'express_points_received': 'sum',
          'regular_points_spent': 'sum', 
          'express_points_spent': 'sum', 
          'purchase_sum': 'sum',
         'n_products': 'sum',
         'product_quantity': 'sum',
         'trn_sum_from_iss': 'sum',
         'trn_sum_from_red': 'sum'})
train_purch

Unnamed: 0_level_0,Unnamed: 1_level_0,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red
client_id,interval,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
000012768d,0,15.7,0.0,0.0,0.0,1581.00,30,35.0,1582.0,0.0
000012768d,3,10.0,0.0,0.0,0.0,1222.00,22,19.0,1223.0,0.0
000036f903,0,9.4,60.0,0.0,0.0,1957.00,36,35.0,1958.0,0.0
000036f903,1,18.8,0.0,0.0,0.0,2482.00,36,39.0,2484.0,0.0
000036f903,2,13.0,0.0,0.0,0.0,2582.00,46,45.0,2583.0,0.0
...,...,...,...,...,...,...,...,...,...,...
fffe0abb97,3,7.3,0.0,-15.0,-60.0,820.72,11,14.0,747.0,89.0
fffeca6d22,0,10.9,0.0,0.0,0.0,1101.00,14,18.0,1103.0,0.0
fffeca6d22,1,0.4,0.0,0.0,0.0,118.00,2,3.0,120.0,0.0
fffeca6d22,2,1.6,0.0,0.0,0.0,362.00,8,10.0,365.0,0.0


In [27]:
scaler_2 = StandardScaler().fit(train_purch)

In [28]:
scaled_train = scaler_2.transform(train_purch)

In [29]:
train_purch = pd.DataFrame(scaled_train,
                           columns=train_purch.columns,
                           index=train_purch.index)
train_purch.reset_index(inplace=True)
train_purch

Unnamed: 0,client_id,interval,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red
0,000012768d,0,-0.204183,-0.054870,0.330832,0.232968,-0.330435,-0.093396,-0.140969,-0.317685,-0.355603
1,000012768d,3,-0.377653,-0.054870,0.330832,0.232968,-0.463599,-0.351020,-0.517286,-0.454695,-0.355603
2,000036f903,0,-0.395913,14.076622,0.330832,0.232968,-0.190966,0.099822,-0.140969,-0.174188,-0.355603
3,000036f903,1,-0.109839,-0.054870,0.330832,0.232968,0.003772,0.099822,-0.046890,0.026556,-0.355603
4,000036f903,2,-0.286353,-0.054870,0.330832,0.232968,0.040865,0.421853,0.094229,0.064339,-0.355603
...,...,...,...,...,...,...,...,...,...,...,...
486282,fffe0abb97,3,-0.459823,-0.054870,0.095183,-7.285750,-0.612445,-0.705254,-0.634886,-0.636356,-0.165411
486283,fffeca6d22,0,-0.350263,-0.054870,0.330832,0.232968,-0.508481,-0.608645,-0.540806,-0.500492,-0.355603
486284,fffeca6d22,1,-0.669813,-0.054870,0.330832,0.232968,-0.873105,-0.995081,-0.893604,-0.875646,-0.355603
486285,fffeca6d22,2,-0.633293,-0.054870,0.330832,0.232968,-0.782598,-0.801863,-0.728965,-0.782144,-0.355603


In [30]:
scaled_test = scaler_2.transform(test_purch)

In [31]:
test_purch = pd.DataFrame(scaled_test,
                           columns=test_purch.columns,
                           index=test_purch.index)
test_purch.reset_index(inplace=True)
test_purch

Unnamed: 0,client_id,interval,regular_points_received,express_points_received,regular_points_spent,express_points_spent,purchase_sum,n_products,product_quantity,trn_sum_from_iss,trn_sum_from_red
0,00010925a5,0,-0.335046,-0.05487,0.330832,0.232968,-0.287407,-0.157802,-0.258568,-0.272651,-0.355603
1,00010925a5,1,-0.618076,-0.05487,0.063763,0.232968,-0.754407,-0.898472,-0.870084,-0.761153,0.580399
2,00010925a5,2,-0.548079,-0.05487,0.330832,0.232968,-0.586747,-0.673051,-0.681925,-0.581018,-0.355603
3,00010925a5,3,-0.258963,-0.05487,0.330832,0.232968,0.143241,0.003213,-0.188009,0.170435,-0.355603
4,00035a21d9,2,-0.414173,-0.05487,0.330832,0.232968,-0.543742,-0.640848,-0.658406,-0.537129,-0.355603
...,...,...,...,...,...,...,...,...,...,...,...
208594,fffea1204c,3,-0.350263,-0.05487,-0.297565,0.232968,-0.269080,-0.286614,-0.023370,-0.269598,0.146590
208595,fffff6ce77,0,0.617518,-0.05487,0.330832,0.232968,-0.119158,-0.512036,-0.352648,-0.100912,-0.355603
208596,fffff6ce77,1,1.777028,-0.05487,0.330832,0.232968,1.970810,1.323538,1.717098,2.051550,-0.355603
208597,fffff6ce77,2,1.463565,-0.05487,-4.413566,0.232968,0.638061,1.130320,0.846864,0.566962,1.898922


In [32]:
test_purch = test_purch.pivot(index='client_id', columns='interval')
test_purch.fillna(0, inplace=True)
test_purch

Unnamed: 0_level_0,regular_points_received,regular_points_received,regular_points_received,regular_points_received,express_points_received,express_points_received,express_points_received,express_points_received,regular_points_spent,regular_points_spent,...,product_quantity,product_quantity,trn_sum_from_iss,trn_sum_from_iss,trn_sum_from_iss,trn_sum_from_iss,trn_sum_from_red,trn_sum_from_red,trn_sum_from_red,trn_sum_from_red
interval,0,1,2,3,0,1,2,3,0,1,...,2,3,0,1,2,3,0,1,2,3
client_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
00010925a5,-0.335046,-0.618076,-0.548079,-0.258963,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.063763,...,-0.681925,-0.188009,-0.272651,-0.761153,-0.581018,0.170435,-0.355603,0.580399,-0.355603,-0.355603
00035a21d9,0.000000,0.000000,-0.414173,-0.231573,0.00000,0.00000,-0.05487,-0.05487,0.000000,0.000000,...,-0.658406,-0.728965,0.000000,0.000000,-0.537129,-0.635211,0.000000,0.000000,-0.355603,1.326209
00038f9200,-0.149403,-0.179836,-0.368523,0.489698,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,-0.124756,...,0.447027,0.352948,-0.421492,-0.132970,-0.074197,0.142957,-0.355603,0.375248,-0.355603,-0.355603
0004315e57,-0.517646,-0.164619,-0.167663,0.185364,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.188009,0.258868,-0.718791,-0.169989,-0.163502,0.228063,-0.355603,-0.355603,1.170208,1.168071
0006fca4bf,0.000000,0.000000,0.000000,0.395354,0.00000,0.00000,0.00000,-0.05487,0.000000,0.000000,...,0.000000,0.047190,0.000000,0.000000,0.000000,-0.103965,0.000000,0.000000,0.000000,-0.355603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fff9aa288c,-0.377653,-0.636336,0.227971,-0.660683,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,0.023670,-0.917124,-0.519955,-0.799699,0.149445,-0.865723,-0.355603,-0.355603,-0.355603,0.054699
fff9f772f6,-0.258963,-0.447649,0.000000,1.579211,-0.05487,-0.05487,0.00000,-0.05487,0.330832,0.330832,...,0.000000,0.776305,-0.390197,-0.624144,0.000000,0.637947,-0.355603,-0.355603,0.000000,-0.355603
fffe0ed719,-0.505473,-0.599816,-0.633293,1.137928,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.658406,2.610852,-0.555830,-0.703525,-0.790158,2.345414,-0.355603,-0.355603,-0.355603,-0.355603
fffea1204c,-0.587643,-0.584599,-0.411129,-0.350263,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,0.000150,-0.023370,-0.676047,-0.660781,-0.453550,-0.269598,-0.355603,-0.355603,-0.355603,0.146590


In [33]:
train_purch = train_purch.pivot(index='client_id', columns='interval')
train_purch.fillna(0, inplace=True)
train_purch

Unnamed: 0_level_0,regular_points_received,regular_points_received,regular_points_received,regular_points_received,express_points_received,express_points_received,express_points_received,express_points_received,regular_points_spent,regular_points_spent,...,product_quantity,product_quantity,trn_sum_from_iss,trn_sum_from_iss,trn_sum_from_iss,trn_sum_from_iss,trn_sum_from_red,trn_sum_from_red,trn_sum_from_red,trn_sum_from_red
interval,0,1,2,3,0,1,2,3,0,1,...,2,3,0,1,2,3,0,1,2,3
client_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
000012768d,-0.204183,0.000000,0.000000,-0.377653,-0.054870,0.00000,0.00000,-0.05487,0.330832,0.000000,...,0.000000,-0.517286,-0.317685,0.000000,0.000000,-0.454695,-0.355603,0.000000,0.000000,-0.355603
000036f903,-0.395913,-0.109839,-0.286353,-0.265049,14.076622,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,0.094229,0.211829,-0.174188,0.026556,0.064339,0.141431,-0.355603,-0.355603,-0.355603,-0.355603
0001f552b0,-0.179836,-0.325916,-0.569383,0.748381,-0.054870,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.399687,-0.376167,-0.218840,-0.412714,-0.627960,-0.077250,-0.355603,-0.355603,-0.355603,-0.355603
00020e7b18,1.734421,1.999192,1.150101,1.095321,-0.054870,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,1.387820,1.129102,1.998883,1.023787,1.537478,1.373753,-0.355603,-0.355603,7.702989,4.161995
000220a0a7,-0.225486,0.757511,-0.204183,0.112324,-0.054870,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.093929,0.117749,-0.514612,-0.244410,-0.026492,0.386445,-0.355603,-0.355603,-0.355603,-0.355603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fffd5cd0c6,-0.487213,-0.554166,-0.322873,-0.295483,-0.054870,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.258568,0.282388,-0.416912,-0.645516,-0.498202,0.065102,-0.355603,-0.355603,-0.355603,-0.355603
fffd63dfe3,0.000000,-0.645466,-0.569383,-0.633293,0.000000,-0.05487,-0.05487,-0.05487,0.000000,0.330832,...,-0.611366,-0.776005,0.000000,-0.824124,-0.632158,-0.795501,0.000000,-0.355603,-0.355603,-0.355603
fffd8c9d7d,-0.666769,0.000000,-0.502429,-0.541993,-0.054870,0.00000,-0.05487,-0.05487,0.330832,0.000000,...,-0.752485,-0.587846,-0.881752,0.000000,-0.695129,-0.562699,-0.355603,0.000000,-0.355603,-0.355603
fffe0abb97,-0.493299,-0.648509,-0.450693,-0.459823,-0.054870,-0.05487,-0.05487,-0.05487,0.173733,0.330832,...,-0.681925,-0.634886,-0.666506,-0.830231,-0.582163,-0.636356,-0.141904,0.217110,-0.184644,-0.165411


In [34]:
columns = train_purch.columns.to_flat_index().to_list()
columns = [col[0] + '_' + str(col[1]) for col in columns]

In [35]:
train_purch.columns = columns
train_purch

Unnamed: 0_level_0,regular_points_received_0,regular_points_received_1,regular_points_received_2,regular_points_received_3,express_points_received_0,express_points_received_1,express_points_received_2,express_points_received_3,regular_points_spent_0,regular_points_spent_1,...,product_quantity_2,product_quantity_3,trn_sum_from_iss_0,trn_sum_from_iss_1,trn_sum_from_iss_2,trn_sum_from_iss_3,trn_sum_from_red_0,trn_sum_from_red_1,trn_sum_from_red_2,trn_sum_from_red_3
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000012768d,-0.204183,0.000000,0.000000,-0.377653,-0.054870,0.00000,0.00000,-0.05487,0.330832,0.000000,...,0.000000,-0.517286,-0.317685,0.000000,0.000000,-0.454695,-0.355603,0.000000,0.000000,-0.355603
000036f903,-0.395913,-0.109839,-0.286353,-0.265049,14.076622,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,0.094229,0.211829,-0.174188,0.026556,0.064339,0.141431,-0.355603,-0.355603,-0.355603,-0.355603
0001f552b0,-0.179836,-0.325916,-0.569383,0.748381,-0.054870,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.399687,-0.376167,-0.218840,-0.412714,-0.627960,-0.077250,-0.355603,-0.355603,-0.355603,-0.355603
00020e7b18,1.734421,1.999192,1.150101,1.095321,-0.054870,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,1.387820,1.129102,1.998883,1.023787,1.537478,1.373753,-0.355603,-0.355603,7.702989,4.161995
000220a0a7,-0.225486,0.757511,-0.204183,0.112324,-0.054870,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.093929,0.117749,-0.514612,-0.244410,-0.026492,0.386445,-0.355603,-0.355603,-0.355603,-0.355603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fffd5cd0c6,-0.487213,-0.554166,-0.322873,-0.295483,-0.054870,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.258568,0.282388,-0.416912,-0.645516,-0.498202,0.065102,-0.355603,-0.355603,-0.355603,-0.355603
fffd63dfe3,0.000000,-0.645466,-0.569383,-0.633293,0.000000,-0.05487,-0.05487,-0.05487,0.000000,0.330832,...,-0.611366,-0.776005,0.000000,-0.824124,-0.632158,-0.795501,0.000000,-0.355603,-0.355603,-0.355603
fffd8c9d7d,-0.666769,0.000000,-0.502429,-0.541993,-0.054870,0.00000,-0.05487,-0.05487,0.330832,0.000000,...,-0.752485,-0.587846,-0.881752,0.000000,-0.695129,-0.562699,-0.355603,0.000000,-0.355603,-0.355603
fffe0abb97,-0.493299,-0.648509,-0.450693,-0.459823,-0.054870,-0.05487,-0.05487,-0.05487,0.173733,0.330832,...,-0.681925,-0.634886,-0.666506,-0.830231,-0.582163,-0.636356,-0.141904,0.217110,-0.184644,-0.165411


In [36]:
test_purch.columns = columns
test_purch

Unnamed: 0_level_0,regular_points_received_0,regular_points_received_1,regular_points_received_2,regular_points_received_3,express_points_received_0,express_points_received_1,express_points_received_2,express_points_received_3,regular_points_spent_0,regular_points_spent_1,...,product_quantity_2,product_quantity_3,trn_sum_from_iss_0,trn_sum_from_iss_1,trn_sum_from_iss_2,trn_sum_from_iss_3,trn_sum_from_red_0,trn_sum_from_red_1,trn_sum_from_red_2,trn_sum_from_red_3
client_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00010925a5,-0.335046,-0.618076,-0.548079,-0.258963,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.063763,...,-0.681925,-0.188009,-0.272651,-0.761153,-0.581018,0.170435,-0.355603,0.580399,-0.355603,-0.355603
00035a21d9,0.000000,0.000000,-0.414173,-0.231573,0.00000,0.00000,-0.05487,-0.05487,0.000000,0.000000,...,-0.658406,-0.728965,0.000000,0.000000,-0.537129,-0.635211,0.000000,0.000000,-0.355603,1.326209
00038f9200,-0.149403,-0.179836,-0.368523,0.489698,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,-0.124756,...,0.447027,0.352948,-0.421492,-0.132970,-0.074197,0.142957,-0.355603,0.375248,-0.355603,-0.355603
0004315e57,-0.517646,-0.164619,-0.167663,0.185364,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.188009,0.258868,-0.718791,-0.169989,-0.163502,0.228063,-0.355603,-0.355603,1.170208,1.168071
0006fca4bf,0.000000,0.000000,0.000000,0.395354,0.00000,0.00000,0.00000,-0.05487,0.000000,0.000000,...,0.000000,0.047190,0.000000,0.000000,0.000000,-0.103965,0.000000,0.000000,0.000000,-0.355603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fff9aa288c,-0.377653,-0.636336,0.227971,-0.660683,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,0.023670,-0.917124,-0.519955,-0.799699,0.149445,-0.865723,-0.355603,-0.355603,-0.355603,0.054699
fff9f772f6,-0.258963,-0.447649,0.000000,1.579211,-0.05487,-0.05487,0.00000,-0.05487,0.330832,0.330832,...,0.000000,0.776305,-0.390197,-0.624144,0.000000,0.637947,-0.355603,-0.355603,0.000000,-0.355603
fffe0ed719,-0.505473,-0.599816,-0.633293,1.137928,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,-0.658406,2.610852,-0.555830,-0.703525,-0.790158,2.345414,-0.355603,-0.355603,-0.355603,-0.355603
fffea1204c,-0.587643,-0.584599,-0.411129,-0.350263,-0.05487,-0.05487,-0.05487,-0.05487,0.330832,0.330832,...,0.000150,-0.023370,-0.676047,-0.660781,-0.453550,-0.269598,-0.355603,-0.355603,-0.355603,0.146590


In [37]:
test_purch.to_csv('test_purch_4_periods.csv')

In [38]:
train_purch.to_csv('train_purch_4_periods.csv')