In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/src/script.ipynb
/kaggle/lib/kaggle/gcp.py
/kaggle/input/jiegouhua714/train.parquet
/kaggle/input/jiegouhua714/test.parquet
/kaggle/input/amex-default-prediction/sample_submission.csv
/kaggle/input/amex-default-prediction/train_data.csv
/kaggle/input/amex-default-prediction/test_data.csv
/kaggle/input/amex-default-prediction/train_labels.csv
/kaggle/working/__notebook__.ipynb


In [2]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc
import sys
import pickle
import glob
from sklearn.preprocessing import LabelEncoder

pd.set_option('display.max_columns', None)
import random

random.seed(75)
from tqdm.notebook import tqdm_notebook
from functools import partial, reduce

### warnings setting
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning)

#### model
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve, auc
import catboost
from catboost import Pool, CatBoostClassifier
import lightgbm as lgb
import joblib
import pickle
from tqdm.notebook import tqdm_notebook
import uuid

##### LOGGING Stettings #####
import logging

# Create logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)
# Create STDERR handler
handler = logging.StreamHandler(sys.stderr)
# Create formatter and add it to the handler
formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(name)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', )
handler.setFormatter(formatter)
# Set STDERR handler as the only handler
logger.handlers = [handler]

#### plots
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors

sns.set(rc={'axes.facecolor': '#f9ecec', 'figure.facecolor': '#f9ecec'})

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode

### Plotly settings
theme_palette = {
    'base': '#a3d3eb',
    'complementary': '#ebbba3',
    'triadic': '#eba3d3',
    'backgound': '#f6fbfd'
}

temp = dict(layout=go.Layout(font=dict(family="Ubuntu", size=14),
                             height=600,
                             legend=dict(  #traceorder='reversed',
                                 orientation="v",
                                 y=1.15,
                                 x=0.9),
                             plot_bgcolor=theme_palette['backgound'],
                             paper_bgcolor=theme_palette['backgound']))

In [3]:
train = pd.read_parquet('/kaggle/input/jiegouhua714/train.parquet')


In [4]:
## Define some features by category
features = train.drop(['customer_ID', 'S_2'], axis=1).columns.to_list()
cat_vars = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68', 'month',
            'day_of_week']
num_vars = list(filter(lambda x: x not in cat_vars, features))

# devide nums vars by AmEx Categories
delequincy_vars = filter(lambda x: x.startswith('D') and x not in cat_vars, features)
spend_vars = filter(lambda x: (x.startswith('S')) and (x not in cat_vars), features)
payment_vars = filter(lambda x: x.startswith('P') and x not in cat_vars, features)
balance_vars = filter(lambda x: x.startswith('B') and x not in cat_vars, features)
risk_vars = filter(lambda x: x.startswith('R') and x not in cat_vars, features)

with open('features.pkl', 'wb') as f:
    pickle.dump(features, f)

with open('cat_vars.pkl', 'wb') as f:
    pickle.dump(cat_vars, f)

with open('num_vars.pkl', 'wb') as f:
    pickle.dump(num_vars, f)

In [5]:
class BatchGenerator:
    def __init__(self, df, batch_feature='customer_ID', keep_features=features, n_batchs=750):
        self.df = df
        self.batch_feature = batch_feature
        self.keep_feature = list(set([batch_feature] + keep_features))
        self.n_batchs = n_batchs

    def __iter__(self):
        unique_vals = self.df[self.batch_feature].unique()
        batch_size = int(np.ceil(len(unique_vals) / self.n_batchs))
        groups = self.df.groupby(self.batch_feature).groups
        n_batchs = min(self.n_batchs, int(np.ceil(len(unique_vals) / batch_size)))
        for i in range(n_batchs):
            keys = unique_vals[i * batch_size:(i + 1) * batch_size]
            idx = [i for s in keys for i in groups[s]]
            if i == n_batchs - 1:
                keys = unique_vals[(i + 1) * batch_size:]
                idx = idx + [i for s in keys for i in groups[s]]
            yield self.df.loc[idx, self.keep_feature]


week_days = {1: 'Mon', 2: 'Tue', 3: 'Wen', 4: 'Thu', 5: 'Fri', 6: 'Sat', 7: 'Sun'}

In [6]:
def extract_date_vars(df, date_var='S_2', sort_by=['customer_ID', 'S_2'], week_days=week_days):
    # change to datetime
    df[date_var] = pd.to_datetime(df[date_var])
    # sort by custoner ther by date
    df = df.sort_values(by=sort_by)
    # extract some date characteristics
    # year has not a very
    # month
    df['month'] = df[date_var].dt.month
    # day of week
    df['day_of_week'] = df[date_var].apply(lambda x: x.isocalendar()[-1])
    return df


group_names = ["delequincy_vars", "spend_vars", "payment_vars", "balance_vars", "risk_vars"]


In [7]:
def row_rise_aggregation(df,
                         group_vars=[delequincy_vars, spend_vars, payment_vars, balance_vars, risk_vars],
                         group_names=group_names,
                         save=True):
    print('shape before row_rise_aggregation', df.shape)
    for group_name, group_var in zip(group_names, group_vars):
        df[group_name + '_sum'] = df[group_var].sum(axis=1)
        df[group_name + '_mean'] = df[group_var].mean(axis=1)
        df[group_name + '_missing'] = df.isnull().sum(axis=1)
    print('shape after row_rise_aggregation', df.shape)
    if save:
        df.reset_index(drop=False).to_feather(f"row_agg_{str(uuid.uuid4())}.ftr")
        return df['customer_ID'].nunique()
    return df

In [8]:
def column_rise_aggregation(df, num_vars=num_vars, cat_vars=cat_vars, save=True):
    print('shape before column_rise_aggregation', df.shape)
    group_names = filter(lambda x: '_vars' in x, df.columns)
    num_agg = df.groupby("customer_ID")[list(set(list(num_vars) + list(group_names)))].agg(
        ['mean', 'std', 'min', 'max', 'last'])
    num_agg.columns = ['_'.join(x) for x in num_agg.columns]

    cat_agg = df.groupby("customer_ID")[list(set(list(cat_vars) + ['month', 'day_of_week']))].agg(
        ['count', 'last', 'nunique', pd.Series.mode])
    cat_agg.columns = ['_'.join(x) for x in cat_agg.columns]

    mode_cols = filter(lambda x: x.endswith('_mode'), cat_agg.columns)
    for col in mode_cols:
        cat_agg[col] = cat_agg[col].apply(lambda x: random.choice(str(x).strip('[]').split()))
    #concat the two dataframes
    df = pd.concat([num_agg, cat_agg], axis=1)
    del num_agg, cat_agg

    gc.collect()
    print('shape after column_rise_aggregation', df.shape)
    if save:
        df.reset_index(drop=False).to_feather(f"col_agg_{str(uuid.uuid4())}.ftr")
        return len(df)  #df['customer_ID'].nunique()
    return df

In [9]:
# from https://www.kaggle.com/code/ragnar123/amex-lgbm-dart-cv-0-7977
def get_difference(df, num_features):
    res = []
    customer_ids = []
    for customer_id, df in tqdm_notebook(df.groupby(['customer_ID'])):
        # Get the differences
        diff_df = df[num_features].diff(1).iloc[[-1]].values.astype(np.float32)
        # Append to lists
        res.append(diff_df)
        customer_ids.append(customer_id)
    # Concatenate
    res = np.concatenate(res, axis=0)
    # Transform to dataframe
    res = pd.DataFrame(res, columns=[col + '_diff1' for col in df[num_features].columns])
    # Add customer id
    res['customer_ID'] = customer_ids
    print('final shape', res.shape)
    #       df = df.merge(res, on='customer_ID', how='inner')
    #       df.reset_index(drop=False).to_feather(f"diff_{str(uuid.uuid4())}.ftr")
    return res  #df['customer_ID'].nunique()

In [10]:
def save_partition(df, prefix='train'):
    global c
    df.reset_index(drop=True).to_feather(f'{prefix}_{c}.ftr')
    c = c + 1
    return df['customer_ID'].nunique()

In [11]:
N_BATCHS = 100
c = 0

samples_df = BatchGenerator(train, batch_feature='customer_ID', keep_features=features + ['S_2'], n_batchs=N_BATCHS)
processed_elements = sum(map(partial(save_partition, prefix='train'), tqdm_notebook(samples_df, total=N_BATCHS)))

del train
gc.collect()

  0%|          | 0/100 [00:00<?, ?it/s]

18

In [12]:
processed = []
n_paths = 0
for path in tqdm_notebook(glob.glob('train_*.ftr')):
    # apply on train
    sample_df = pd.read_feather(path)
    diff_df = get_difference(sample_df, num_features=num_vars)
    sample_df = sample_df.merge(diff_df, on='customer_ID', how='inner')

    #   if sample_df.shape[1]<300:
    sample_df = extract_date_vars(sample_df)
    all_num_vars = num_vars + list(map(lambda x: x + '_diff1', num_vars))
    sample_df = row_rise_aggregation(sample_df, save=False)
    sample_df = column_rise_aggregation(sample_df, num_vars=all_num_vars, save=False)

    
    for col in tqdm_notebook(num_vars):
        try:
            sample_df[f'{col}_last_mean_diff'] = sample_df[f'{col}_last'] - sample_df[f'{col}_mean']
        except:
            pass

    sample_df = sample_df.reset_index()

    sample_df.reset_index(drop=True).to_feather(path)
    

train = pd.concat(map(lambda sample_df: pd.read_feather(sample_df), tqdm_notebook(glob.glob('train_*.ftr'))))

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55372, 369)
shape after row_rise_aggregation (55372, 384)
shape before column_rise_aggregation (55372, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55244, 369)
shape after row_rise_aggregation (55244, 384)
shape before column_rise_aggregation (55244, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55182, 369)
shape after row_rise_aggregation (55182, 384)
shape before column_rise_aggregation (55182, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55295, 369)
shape after row_rise_aggregation (55295, 384)
shape before column_rise_aggregation (55295, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55807, 369)
shape after row_rise_aggregation (55807, 384)
shape before column_rise_aggregation (55807, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55157, 369)
shape after row_rise_aggregation (55157, 384)
shape before column_rise_aggregation (55157, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55150, 369)
shape after row_rise_aggregation (55150, 384)
shape before column_rise_aggregation (55150, 384)
shape after column_rise_aggregation (4590, 1865)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55155, 369)
shape after row_rise_aggregation (55155, 384)
shape before column_rise_aggregation (55155, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55203, 369)
shape after row_rise_aggregation (55203, 384)
shape before column_rise_aggregation (55203, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55111, 369)
shape after row_rise_aggregation (55111, 384)
shape before column_rise_aggregation (55111, 384)
shape after column_rise_aggregation (4590, 1889)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55069, 369)
shape after row_rise_aggregation (55069, 384)
shape before column_rise_aggregation (55069, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55435, 369)
shape after row_rise_aggregation (55435, 384)
shape before column_rise_aggregation (55435, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55118, 369)
shape after row_rise_aggregation (55118, 384)
shape before column_rise_aggregation (55118, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55386, 369)
shape after row_rise_aggregation (55386, 384)
shape before column_rise_aggregation (55386, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55442, 369)
shape after row_rise_aggregation (55442, 384)
shape before column_rise_aggregation (55442, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55455, 369)
shape after row_rise_aggregation (55455, 384)
shape before column_rise_aggregation (55455, 384)
shape after column_rise_aggregation (4590, 1889)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55576, 369)
shape after row_rise_aggregation (55576, 384)
shape before column_rise_aggregation (55576, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4503 [00:00<?, ?it/s]

final shape (4503, 178)
shape before row_rise_aggregation (54161, 369)
shape after row_rise_aggregation (54161, 384)
shape before column_rise_aggregation (54161, 384)
shape after column_rise_aggregation (4503, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55555, 369)
shape after row_rise_aggregation (55555, 384)
shape before column_rise_aggregation (55555, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55060, 369)
shape after row_rise_aggregation (55060, 384)
shape before column_rise_aggregation (55060, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55427, 369)
shape after row_rise_aggregation (55427, 384)
shape before column_rise_aggregation (55427, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55460, 369)
shape after row_rise_aggregation (55460, 384)
shape before column_rise_aggregation (55460, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55693, 369)
shape after row_rise_aggregation (55693, 384)
shape before column_rise_aggregation (55693, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55429, 369)
shape after row_rise_aggregation (55429, 384)
shape before column_rise_aggregation (55429, 384)
shape after column_rise_aggregation (4590, 1889)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55260, 369)
shape after row_rise_aggregation (55260, 384)
shape before column_rise_aggregation (55260, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55404, 369)
shape after row_rise_aggregation (55404, 384)
shape before column_rise_aggregation (55404, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55685, 369)
shape after row_rise_aggregation (55685, 384)
shape before column_rise_aggregation (55685, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55082, 369)
shape after row_rise_aggregation (55082, 384)
shape before column_rise_aggregation (55082, 384)
shape after column_rise_aggregation (4590, 1889)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55320, 369)
shape after row_rise_aggregation (55320, 384)
shape before column_rise_aggregation (55320, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55683, 369)
shape after row_rise_aggregation (55683, 384)
shape before column_rise_aggregation (55683, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55463, 369)
shape after row_rise_aggregation (55463, 384)
shape before column_rise_aggregation (55463, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55256, 369)
shape after row_rise_aggregation (55256, 384)
shape before column_rise_aggregation (55256, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55180, 369)
shape after row_rise_aggregation (55180, 384)
shape before column_rise_aggregation (55180, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55287, 369)
shape after row_rise_aggregation (55287, 384)
shape before column_rise_aggregation (55287, 384)
shape after column_rise_aggregation (4590, 1885)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55462, 369)
shape after row_rise_aggregation (55462, 384)
shape before column_rise_aggregation (55462, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55174, 369)
shape after row_rise_aggregation (55174, 384)
shape before column_rise_aggregation (55174, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55158, 369)
shape after row_rise_aggregation (55158, 384)
shape before column_rise_aggregation (55158, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (54990, 369)
shape after row_rise_aggregation (54990, 384)
shape before column_rise_aggregation (54990, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55341, 369)
shape after row_rise_aggregation (55341, 384)
shape before column_rise_aggregation (55341, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55365, 369)
shape after row_rise_aggregation (55365, 384)
shape before column_rise_aggregation (55365, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55180, 369)
shape after row_rise_aggregation (55180, 384)
shape before column_rise_aggregation (55180, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55372, 369)
shape after row_rise_aggregation (55372, 384)
shape before column_rise_aggregation (55372, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55388, 369)
shape after row_rise_aggregation (55388, 384)
shape before column_rise_aggregation (55388, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55187, 369)
shape after row_rise_aggregation (55187, 384)
shape before column_rise_aggregation (55187, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55198, 369)
shape after row_rise_aggregation (55198, 384)
shape before column_rise_aggregation (55198, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55232, 369)
shape after row_rise_aggregation (55232, 384)
shape before column_rise_aggregation (55232, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55213, 369)
shape after row_rise_aggregation (55213, 384)
shape before column_rise_aggregation (55213, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55253, 369)
shape after row_rise_aggregation (55253, 384)
shape before column_rise_aggregation (55253, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55264, 369)
shape after row_rise_aggregation (55264, 384)
shape before column_rise_aggregation (55264, 384)
shape after column_rise_aggregation (4590, 1865)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55350, 369)
shape after row_rise_aggregation (55350, 384)
shape before column_rise_aggregation (55350, 384)
shape after column_rise_aggregation (4590, 1889)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55307, 369)
shape after row_rise_aggregation (55307, 384)
shape before column_rise_aggregation (55307, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55273, 369)
shape after row_rise_aggregation (55273, 384)
shape before column_rise_aggregation (55273, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55728, 369)
shape after row_rise_aggregation (55728, 384)
shape before column_rise_aggregation (55728, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55303, 369)
shape after row_rise_aggregation (55303, 384)
shape before column_rise_aggregation (55303, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55705, 369)
shape after row_rise_aggregation (55705, 384)
shape before column_rise_aggregation (55705, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55260, 369)
shape after row_rise_aggregation (55260, 384)
shape before column_rise_aggregation (55260, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55342, 369)
shape after row_rise_aggregation (55342, 384)
shape before column_rise_aggregation (55342, 384)
shape after column_rise_aggregation (4590, 1881)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55331, 369)
shape after row_rise_aggregation (55331, 384)
shape before column_rise_aggregation (55331, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55253, 369)
shape after row_rise_aggregation (55253, 384)
shape before column_rise_aggregation (55253, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (54987, 369)
shape after row_rise_aggregation (54987, 384)
shape before column_rise_aggregation (54987, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55375, 369)
shape after row_rise_aggregation (55375, 384)
shape before column_rise_aggregation (55375, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55306, 369)
shape after row_rise_aggregation (55306, 384)
shape before column_rise_aggregation (55306, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55224, 369)
shape after row_rise_aggregation (55224, 384)
shape before column_rise_aggregation (55224, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55294, 369)
shape after row_rise_aggregation (55294, 384)
shape before column_rise_aggregation (55294, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55539, 369)
shape after row_rise_aggregation (55539, 384)
shape before column_rise_aggregation (55539, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55473, 369)
shape after row_rise_aggregation (55473, 384)
shape before column_rise_aggregation (55473, 384)
shape after column_rise_aggregation (4590, 1861)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55204, 369)
shape after row_rise_aggregation (55204, 384)
shape before column_rise_aggregation (55204, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55029, 369)
shape after row_rise_aggregation (55029, 384)
shape before column_rise_aggregation (55029, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55534, 369)
shape after row_rise_aggregation (55534, 384)
shape before column_rise_aggregation (55534, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55191, 369)
shape after row_rise_aggregation (55191, 384)
shape before column_rise_aggregation (55191, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55370, 369)
shape after row_rise_aggregation (55370, 384)
shape before column_rise_aggregation (55370, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55514, 369)
shape after row_rise_aggregation (55514, 384)
shape before column_rise_aggregation (55514, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55130, 369)
shape after row_rise_aggregation (55130, 384)
shape before column_rise_aggregation (55130, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55223, 369)
shape after row_rise_aggregation (55223, 384)
shape before column_rise_aggregation (55223, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55423, 369)
shape after row_rise_aggregation (55423, 384)
shape before column_rise_aggregation (55423, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55304, 369)
shape after row_rise_aggregation (55304, 384)
shape before column_rise_aggregation (55304, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55164, 369)
shape after row_rise_aggregation (55164, 384)
shape before column_rise_aggregation (55164, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55314, 369)
shape after row_rise_aggregation (55314, 384)
shape before column_rise_aggregation (55314, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55260, 369)
shape after row_rise_aggregation (55260, 384)
shape before column_rise_aggregation (55260, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55417, 369)
shape after row_rise_aggregation (55417, 384)
shape before column_rise_aggregation (55417, 384)
shape after column_rise_aggregation (4590, 1889)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55593, 369)
shape after row_rise_aggregation (55593, 384)
shape before column_rise_aggregation (55593, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55246, 369)
shape after row_rise_aggregation (55246, 384)
shape before column_rise_aggregation (55246, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55249, 369)
shape after row_rise_aggregation (55249, 384)
shape before column_rise_aggregation (55249, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55459, 369)
shape after row_rise_aggregation (55459, 384)
shape before column_rise_aggregation (55459, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55455, 369)
shape after row_rise_aggregation (55455, 384)
shape before column_rise_aggregation (55455, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55275, 369)
shape after row_rise_aggregation (55275, 384)
shape before column_rise_aggregation (55275, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55629, 369)
shape after row_rise_aggregation (55629, 384)
shape before column_rise_aggregation (55629, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55250, 369)
shape after row_rise_aggregation (55250, 384)
shape before column_rise_aggregation (55250, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55423, 369)
shape after row_rise_aggregation (55423, 384)
shape before column_rise_aggregation (55423, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55021, 369)
shape after row_rise_aggregation (55021, 384)
shape before column_rise_aggregation (55021, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55346, 369)
shape after row_rise_aggregation (55346, 384)
shape before column_rise_aggregation (55346, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55513, 369)
shape after row_rise_aggregation (55513, 384)
shape before column_rise_aggregation (55513, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55776, 369)
shape after row_rise_aggregation (55776, 384)
shape before column_rise_aggregation (55776, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55475, 369)
shape after row_rise_aggregation (55475, 384)
shape before column_rise_aggregation (55475, 384)
shape after column_rise_aggregation (4590, 1889)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55236, 369)
shape after row_rise_aggregation (55236, 384)
shape before column_rise_aggregation (55236, 384)
shape after column_rise_aggregation (4590, 1897)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (54949, 369)
shape after row_rise_aggregation (54949, 384)
shape before column_rise_aggregation (54949, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55249, 369)
shape after row_rise_aggregation (55249, 384)
shape before column_rise_aggregation (55249, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55208, 369)
shape after row_rise_aggregation (55208, 384)
shape before column_rise_aggregation (55208, 384)
shape after column_rise_aggregation (4590, 1889)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55529, 369)
shape after row_rise_aggregation (55529, 384)
shape before column_rise_aggregation (55529, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/4590 [00:00<?, ?it/s]

final shape (4590, 178)
shape before row_rise_aggregation (55104, 369)
shape after row_rise_aggregation (55104, 384)
shape before column_rise_aggregation (55104, 384)
shape after column_rise_aggregation (4590, 1893)


  0%|          | 0/177 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

In [13]:
## Left join with labels:
labels = pd.read_csv('/kaggle/input/amex-default-prediction/train_labels.csv')
print(labels.shape, labels['customer_ID'].nunique())
labels = labels.set_index('customer_ID')
train = train.set_index('customer_ID')
train['target'] = labels['target']
# del labels
gc.collect()
# save result
train.reset_index().to_feather('train_fin_df.ftr')

(458913, 2) 458913


In [14]:
N_BATCHS = 300
c=0

test = pd.read_parquet('/kaggle/input/jiegouhua714/test.parquet')
n_cid = test.customer_ID.nunique()
print(test.shape, n_cid )
samples_df = BatchGenerator(test, batch_feature='customer_ID', keep_features=features+['S_2'], n_batchs=N_BATCHS)
processed_elements = sum(map(partial(save_partition, prefix='test'), tqdm_notebook(samples_df, total=N_BATCHS)))
    
del test
gc.collect()

(11363762, 190) 924621


  0%|          | 0/300 [00:00<?, ?it/s]

18

In [15]:
processed = 0

      
for path in tqdm_notebook(glob.glob('test_*.ftr')):
    # apply on train
    sample_df = pd.read_feather(path)
    diff_df = get_difference(sample_df, num_features=num_vars)
    sample_df = sample_df.merge(diff_df, on='customer_ID', how='inner')
        
 #   if sample_df.shape[1]<300:
    sample_df = extract_date_vars(sample_df)
    all_num_vars = num_vars + list(map(lambda x: x+'_diff1', num_vars))
    sample_df = row_rise_aggregation(sample_df, save=False)
    sample_df = column_rise_aggregation(sample_df, num_vars=all_num_vars, save=False)
        
    print("diff between last and mean transaction")
    for col in tqdm_notebook(num_vars):
        try:
            sample_df[f'{col}_last_mean_diff'] = sample_df[f'{col}_last'] - sample_df[f'{col}_mean']
        except:
            pass

    sample_df = sample_df.reset_index()
        
    sample_df.reset_index(drop=True).to_feather(path)
    print("save processed", path)
        
    processed += sample_df.customer_ID.nunique()
        
test = pd.concat(map(lambda sample_df: pd.read_feather(sample_df), tqdm_notebook(glob.glob('test_*.ftr'))))
test.reset_index(drop=True).to_feather('test_fin_df.ftr')

  0%|          | 0/300 [00:00<?, ?it/s]

  0%|          | 0/2804 [00:00<?, ?it/s]

final shape (2804, 178)
shape before row_rise_aggregation (34462, 369)
shape after row_rise_aggregation (34462, 384)
shape before column_rise_aggregation (34462, 384)
shape after column_rise_aggregation (2804, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_299.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37716, 369)
shape after row_rise_aggregation (37716, 384)
shape before column_rise_aggregation (37716, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_57.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37803, 369)
shape after row_rise_aggregation (37803, 384)
shape before column_rise_aggregation (37803, 384)
shape after column_rise_aggregation (3083, 1885)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_112.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37984, 369)
shape after row_rise_aggregation (37984, 384)
shape before column_rise_aggregation (37984, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_241.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37919, 369)
shape after row_rise_aggregation (37919, 384)
shape before column_rise_aggregation (37919, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_226.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38004, 369)
shape after row_rise_aggregation (38004, 384)
shape before column_rise_aggregation (38004, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_270.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37747, 369)
shape after row_rise_aggregation (37747, 384)
shape before column_rise_aggregation (37747, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_191.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38042, 369)
shape after row_rise_aggregation (38042, 384)
shape before column_rise_aggregation (38042, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_40.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37922, 369)
shape after row_rise_aggregation (37922, 384)
shape before column_rise_aggregation (37922, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_263.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37810, 369)
shape after row_rise_aggregation (37810, 384)
shape before column_rise_aggregation (37810, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_29.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37810, 369)
shape after row_rise_aggregation (37810, 384)
shape before column_rise_aggregation (37810, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_21.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38057, 369)
shape after row_rise_aggregation (38057, 384)
shape before column_rise_aggregation (38057, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_195.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37920, 369)
shape after row_rise_aggregation (37920, 384)
shape before column_rise_aggregation (37920, 384)
shape after column_rise_aggregation (3083, 1885)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_275.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37930, 369)
shape after row_rise_aggregation (37930, 384)
shape before column_rise_aggregation (37930, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_239.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37704, 369)
shape after row_rise_aggregation (37704, 384)
shape before column_rise_aggregation (37704, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_118.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37724, 369)
shape after row_rise_aggregation (37724, 384)
shape before column_rise_aggregation (37724, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_240.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37686, 369)
shape after row_rise_aggregation (37686, 384)
shape before column_rise_aggregation (37686, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_74.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38000, 369)
shape after row_rise_aggregation (38000, 384)
shape before column_rise_aggregation (38000, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_293.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38027, 369)
shape after row_rise_aggregation (38027, 384)
shape before column_rise_aggregation (38027, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_242.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37917, 369)
shape after row_rise_aggregation (37917, 384)
shape before column_rise_aggregation (37917, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_234.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37913, 369)
shape after row_rise_aggregation (37913, 384)
shape before column_rise_aggregation (37913, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_250.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37910, 369)
shape after row_rise_aggregation (37910, 384)
shape before column_rise_aggregation (37910, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_228.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37987, 369)
shape after row_rise_aggregation (37987, 384)
shape before column_rise_aggregation (37987, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_216.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37894, 369)
shape after row_rise_aggregation (37894, 384)
shape before column_rise_aggregation (37894, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_189.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37968, 369)
shape after row_rise_aggregation (37968, 384)
shape before column_rise_aggregation (37968, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_171.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37651, 369)
shape after row_rise_aggregation (37651, 384)
shape before column_rise_aggregation (37651, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_253.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37929, 369)
shape after row_rise_aggregation (37929, 384)
shape before column_rise_aggregation (37929, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_223.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37969, 369)
shape after row_rise_aggregation (37969, 384)
shape before column_rise_aggregation (37969, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_38.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37981, 369)
shape after row_rise_aggregation (37981, 384)
shape before column_rise_aggregation (37981, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_210.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37910, 369)
shape after row_rise_aggregation (37910, 384)
shape before column_rise_aggregation (37910, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_231.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37997, 369)
shape after row_rise_aggregation (37997, 384)
shape before column_rise_aggregation (37997, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_163.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37935, 369)
shape after row_rise_aggregation (37935, 384)
shape before column_rise_aggregation (37935, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_140.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37893, 369)
shape after row_rise_aggregation (37893, 384)
shape before column_rise_aggregation (37893, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_111.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37866, 369)
shape after row_rise_aggregation (37866, 384)
shape before column_rise_aggregation (37866, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_117.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37853, 369)
shape after row_rise_aggregation (37853, 384)
shape before column_rise_aggregation (37853, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_254.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37793, 369)
shape after row_rise_aggregation (37793, 384)
shape before column_rise_aggregation (37793, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_204.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38063, 369)
shape after row_rise_aggregation (38063, 384)
shape before column_rise_aggregation (38063, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_230.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37785, 369)
shape after row_rise_aggregation (37785, 384)
shape before column_rise_aggregation (37785, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_285.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37775, 369)
shape after row_rise_aggregation (37775, 384)
shape before column_rise_aggregation (37775, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_201.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37866, 369)
shape after row_rise_aggregation (37866, 384)
shape before column_rise_aggregation (37866, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_244.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37939, 369)
shape after row_rise_aggregation (37939, 384)
shape before column_rise_aggregation (37939, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_262.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37841, 369)
shape after row_rise_aggregation (37841, 384)
shape before column_rise_aggregation (37841, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_290.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38076, 369)
shape after row_rise_aggregation (38076, 384)
shape before column_rise_aggregation (38076, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_143.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37816, 369)
shape after row_rise_aggregation (37816, 384)
shape before column_rise_aggregation (37816, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_179.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37960, 369)
shape after row_rise_aggregation (37960, 384)
shape before column_rise_aggregation (37960, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_235.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37982, 369)
shape after row_rise_aggregation (37982, 384)
shape before column_rise_aggregation (37982, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_49.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37922, 369)
shape after row_rise_aggregation (37922, 384)
shape before column_rise_aggregation (37922, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_122.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37865, 369)
shape after row_rise_aggregation (37865, 384)
shape before column_rise_aggregation (37865, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_247.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37890, 369)
shape after row_rise_aggregation (37890, 384)
shape before column_rise_aggregation (37890, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_184.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38009, 369)
shape after row_rise_aggregation (38009, 384)
shape before column_rise_aggregation (38009, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_77.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37796, 369)
shape after row_rise_aggregation (37796, 384)
shape before column_rise_aggregation (37796, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_59.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37841, 369)
shape after row_rise_aggregation (37841, 384)
shape before column_rise_aggregation (37841, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_214.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38031, 369)
shape after row_rise_aggregation (38031, 384)
shape before column_rise_aggregation (38031, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_170.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37904, 369)
shape after row_rise_aggregation (37904, 384)
shape before column_rise_aggregation (37904, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_261.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37974, 369)
shape after row_rise_aggregation (37974, 384)
shape before column_rise_aggregation (37974, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_193.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37757, 369)
shape after row_rise_aggregation (37757, 384)
shape before column_rise_aggregation (37757, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_178.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37722, 369)
shape after row_rise_aggregation (37722, 384)
shape before column_rise_aggregation (37722, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_48.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37755, 369)
shape after row_rise_aggregation (37755, 384)
shape before column_rise_aggregation (37755, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_92.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37704, 369)
shape after row_rise_aggregation (37704, 384)
shape before column_rise_aggregation (37704, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_142.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37996, 369)
shape after row_rise_aggregation (37996, 384)
shape before column_rise_aggregation (37996, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_105.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37770, 369)
shape after row_rise_aggregation (37770, 384)
shape before column_rise_aggregation (37770, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_64.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37783, 369)
shape after row_rise_aggregation (37783, 384)
shape before column_rise_aggregation (37783, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_100.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37873, 369)
shape after row_rise_aggregation (37873, 384)
shape before column_rise_aggregation (37873, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_222.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37780, 369)
shape after row_rise_aggregation (37780, 384)
shape before column_rise_aggregation (37780, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_56.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37692, 369)
shape after row_rise_aggregation (37692, 384)
shape before column_rise_aggregation (37692, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_0.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38022, 369)
shape after row_rise_aggregation (38022, 384)
shape before column_rise_aggregation (38022, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_25.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37841, 369)
shape after row_rise_aggregation (37841, 384)
shape before column_rise_aggregation (37841, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_55.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38037, 369)
shape after row_rise_aggregation (38037, 384)
shape before column_rise_aggregation (38037, 384)
shape after column_rise_aggregation (3083, 1885)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_43.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37833, 369)
shape after row_rise_aggregation (37833, 384)
shape before column_rise_aggregation (37833, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_233.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37931, 369)
shape after row_rise_aggregation (37931, 384)
shape before column_rise_aggregation (37931, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_87.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37697, 369)
shape after row_rise_aggregation (37697, 384)
shape before column_rise_aggregation (37697, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_3.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37878, 369)
shape after row_rise_aggregation (37878, 384)
shape before column_rise_aggregation (37878, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_127.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37878, 369)
shape after row_rise_aggregation (37878, 384)
shape before column_rise_aggregation (37878, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_264.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37932, 369)
shape after row_rise_aggregation (37932, 384)
shape before column_rise_aggregation (37932, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_95.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37683, 369)
shape after row_rise_aggregation (37683, 384)
shape before column_rise_aggregation (37683, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_62.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37694, 369)
shape after row_rise_aggregation (37694, 384)
shape before column_rise_aggregation (37694, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_148.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37881, 369)
shape after row_rise_aggregation (37881, 384)
shape before column_rise_aggregation (37881, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_124.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37770, 369)
shape after row_rise_aggregation (37770, 384)
shape before column_rise_aggregation (37770, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_156.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37831, 369)
shape after row_rise_aggregation (37831, 384)
shape before column_rise_aggregation (37831, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_206.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37917, 369)
shape after row_rise_aggregation (37917, 384)
shape before column_rise_aggregation (37917, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_53.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37940, 369)
shape after row_rise_aggregation (37940, 384)
shape before column_rise_aggregation (37940, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_12.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37971, 369)
shape after row_rise_aggregation (37971, 384)
shape before column_rise_aggregation (37971, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_136.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37941, 369)
shape after row_rise_aggregation (37941, 384)
shape before column_rise_aggregation (37941, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_287.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37900, 369)
shape after row_rise_aggregation (37900, 384)
shape before column_rise_aggregation (37900, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_207.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37746, 369)
shape after row_rise_aggregation (37746, 384)
shape before column_rise_aggregation (37746, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_209.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37983, 369)
shape after row_rise_aggregation (37983, 384)
shape before column_rise_aggregation (37983, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_96.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37936, 369)
shape after row_rise_aggregation (37936, 384)
shape before column_rise_aggregation (37936, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_61.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37850, 369)
shape after row_rise_aggregation (37850, 384)
shape before column_rise_aggregation (37850, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_72.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37652, 369)
shape after row_rise_aggregation (37652, 384)
shape before column_rise_aggregation (37652, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_106.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37973, 369)
shape after row_rise_aggregation (37973, 384)
shape before column_rise_aggregation (37973, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_147.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37900, 369)
shape after row_rise_aggregation (37900, 384)
shape before column_rise_aggregation (37900, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_66.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37504, 369)
shape after row_rise_aggregation (37504, 384)
shape before column_rise_aggregation (37504, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_132.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37872, 369)
shape after row_rise_aggregation (37872, 384)
shape before column_rise_aggregation (37872, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_187.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37865, 369)
shape after row_rise_aggregation (37865, 384)
shape before column_rise_aggregation (37865, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_91.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37774, 369)
shape after row_rise_aggregation (37774, 384)
shape before column_rise_aggregation (37774, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_86.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38065, 369)
shape after row_rise_aggregation (38065, 384)
shape before column_rise_aggregation (38065, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_110.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38004, 369)
shape after row_rise_aggregation (38004, 384)
shape before column_rise_aggregation (38004, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_51.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37716, 369)
shape after row_rise_aggregation (37716, 384)
shape before column_rise_aggregation (37716, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_135.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37989, 369)
shape after row_rise_aggregation (37989, 384)
shape before column_rise_aggregation (37989, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_94.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37812, 369)
shape after row_rise_aggregation (37812, 384)
shape before column_rise_aggregation (37812, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_28.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37935, 369)
shape after row_rise_aggregation (37935, 384)
shape before column_rise_aggregation (37935, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_276.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37670, 369)
shape after row_rise_aggregation (37670, 384)
shape before column_rise_aggregation (37670, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_150.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37961, 369)
shape after row_rise_aggregation (37961, 384)
shape before column_rise_aggregation (37961, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_224.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37935, 369)
shape after row_rise_aggregation (37935, 384)
shape before column_rise_aggregation (37935, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_6.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37725, 369)
shape after row_rise_aggregation (37725, 384)
shape before column_rise_aggregation (37725, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_273.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38163, 369)
shape after row_rise_aggregation (38163, 384)
shape before column_rise_aggregation (38163, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_15.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37861, 369)
shape after row_rise_aggregation (37861, 384)
shape before column_rise_aggregation (37861, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_232.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37943, 369)
shape after row_rise_aggregation (37943, 384)
shape before column_rise_aggregation (37943, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_188.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37941, 369)
shape after row_rise_aggregation (37941, 384)
shape before column_rise_aggregation (37941, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_26.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37958, 369)
shape after row_rise_aggregation (37958, 384)
shape before column_rise_aggregation (37958, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_176.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37974, 369)
shape after row_rise_aggregation (37974, 384)
shape before column_rise_aggregation (37974, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_259.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37954, 369)
shape after row_rise_aggregation (37954, 384)
shape before column_rise_aggregation (37954, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_82.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37961, 369)
shape after row_rise_aggregation (37961, 384)
shape before column_rise_aggregation (37961, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_154.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38016, 369)
shape after row_rise_aggregation (38016, 384)
shape before column_rise_aggregation (38016, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_218.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37850, 369)
shape after row_rise_aggregation (37850, 384)
shape before column_rise_aggregation (37850, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_70.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38087, 369)
shape after row_rise_aggregation (38087, 384)
shape before column_rise_aggregation (38087, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_78.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37898, 369)
shape after row_rise_aggregation (37898, 384)
shape before column_rise_aggregation (37898, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_291.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38063, 369)
shape after row_rise_aggregation (38063, 384)
shape before column_rise_aggregation (38063, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_125.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37860, 369)
shape after row_rise_aggregation (37860, 384)
shape before column_rise_aggregation (37860, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_99.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38174, 369)
shape after row_rise_aggregation (38174, 384)
shape before column_rise_aggregation (38174, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_282.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37654, 369)
shape after row_rise_aggregation (37654, 384)
shape before column_rise_aggregation (37654, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_283.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37858, 369)
shape after row_rise_aggregation (37858, 384)
shape before column_rise_aggregation (37858, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_9.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38068, 369)
shape after row_rise_aggregation (38068, 384)
shape before column_rise_aggregation (38068, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_79.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37905, 369)
shape after row_rise_aggregation (37905, 384)
shape before column_rise_aggregation (37905, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_68.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38008, 369)
shape after row_rise_aggregation (38008, 384)
shape before column_rise_aggregation (38008, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_246.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37924, 369)
shape after row_rise_aggregation (37924, 384)
shape before column_rise_aggregation (37924, 384)
shape after column_rise_aggregation (3083, 1881)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_151.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37766, 369)
shape after row_rise_aggregation (37766, 384)
shape before column_rise_aggregation (37766, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_202.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37770, 369)
shape after row_rise_aggregation (37770, 384)
shape before column_rise_aggregation (37770, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_138.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37761, 369)
shape after row_rise_aggregation (37761, 384)
shape before column_rise_aggregation (37761, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_115.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37779, 369)
shape after row_rise_aggregation (37779, 384)
shape before column_rise_aggregation (37779, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_173.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38050, 369)
shape after row_rise_aggregation (38050, 384)
shape before column_rise_aggregation (38050, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_162.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37832, 369)
shape after row_rise_aggregation (37832, 384)
shape before column_rise_aggregation (37832, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_37.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37623, 369)
shape after row_rise_aggregation (37623, 384)
shape before column_rise_aggregation (37623, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_104.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37736, 369)
shape after row_rise_aggregation (37736, 384)
shape before column_rise_aggregation (37736, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_146.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37706, 369)
shape after row_rise_aggregation (37706, 384)
shape before column_rise_aggregation (37706, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_101.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37691, 369)
shape after row_rise_aggregation (37691, 384)
shape before column_rise_aggregation (37691, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_8.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37901, 369)
shape after row_rise_aggregation (37901, 384)
shape before column_rise_aggregation (37901, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_116.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37986, 369)
shape after row_rise_aggregation (37986, 384)
shape before column_rise_aggregation (37986, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_130.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37938, 369)
shape after row_rise_aggregation (37938, 384)
shape before column_rise_aggregation (37938, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_167.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37929, 369)
shape after row_rise_aggregation (37929, 384)
shape before column_rise_aggregation (37929, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_144.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37779, 369)
shape after row_rise_aggregation (37779, 384)
shape before column_rise_aggregation (37779, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_58.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37825, 369)
shape after row_rise_aggregation (37825, 384)
shape before column_rise_aggregation (37825, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_277.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37996, 369)
shape after row_rise_aggregation (37996, 384)
shape before column_rise_aggregation (37996, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_269.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37846, 369)
shape after row_rise_aggregation (37846, 384)
shape before column_rise_aggregation (37846, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_73.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37879, 369)
shape after row_rise_aggregation (37879, 384)
shape before column_rise_aggregation (37879, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_192.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37930, 369)
shape after row_rise_aggregation (37930, 384)
shape before column_rise_aggregation (37930, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_17.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38123, 369)
shape after row_rise_aggregation (38123, 384)
shape before column_rise_aggregation (38123, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_126.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37840, 369)
shape after row_rise_aggregation (37840, 384)
shape before column_rise_aggregation (37840, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_297.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38077, 369)
shape after row_rise_aggregation (38077, 384)
shape before column_rise_aggregation (38077, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_30.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37938, 369)
shape after row_rise_aggregation (37938, 384)
shape before column_rise_aggregation (37938, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_168.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37872, 369)
shape after row_rise_aggregation (37872, 384)
shape before column_rise_aggregation (37872, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_217.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37779, 369)
shape after row_rise_aggregation (37779, 384)
shape before column_rise_aggregation (37779, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_108.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37879, 369)
shape after row_rise_aggregation (37879, 384)
shape before column_rise_aggregation (37879, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_157.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37967, 369)
shape after row_rise_aggregation (37967, 384)
shape before column_rise_aggregation (37967, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_181.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37856, 369)
shape after row_rise_aggregation (37856, 384)
shape before column_rise_aggregation (37856, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_16.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37692, 369)
shape after row_rise_aggregation (37692, 384)
shape before column_rise_aggregation (37692, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_278.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37516, 369)
shape after row_rise_aggregation (37516, 384)
shape before column_rise_aggregation (37516, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_274.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37981, 369)
shape after row_rise_aggregation (37981, 384)
shape before column_rise_aggregation (37981, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_229.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37703, 369)
shape after row_rise_aggregation (37703, 384)
shape before column_rise_aggregation (37703, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_128.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37846, 369)
shape after row_rise_aggregation (37846, 384)
shape before column_rise_aggregation (37846, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_31.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38133, 369)
shape after row_rise_aggregation (38133, 384)
shape before column_rise_aggregation (38133, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_103.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37850, 369)
shape after row_rise_aggregation (37850, 384)
shape before column_rise_aggregation (37850, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_88.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37881, 369)
shape after row_rise_aggregation (37881, 384)
shape before column_rise_aggregation (37881, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_199.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38005, 369)
shape after row_rise_aggregation (38005, 384)
shape before column_rise_aggregation (38005, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_129.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37873, 369)
shape after row_rise_aggregation (37873, 384)
shape before column_rise_aggregation (37873, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_153.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37866, 369)
shape after row_rise_aggregation (37866, 384)
shape before column_rise_aggregation (37866, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_60.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37814, 369)
shape after row_rise_aggregation (37814, 384)
shape before column_rise_aggregation (37814, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_114.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37961, 369)
shape after row_rise_aggregation (37961, 384)
shape before column_rise_aggregation (37961, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_177.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37929, 369)
shape after row_rise_aggregation (37929, 384)
shape before column_rise_aggregation (37929, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_279.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38073, 369)
shape after row_rise_aggregation (38073, 384)
shape before column_rise_aggregation (38073, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_7.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37693, 369)
shape after row_rise_aggregation (37693, 384)
shape before column_rise_aggregation (37693, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_194.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38032, 369)
shape after row_rise_aggregation (38032, 384)
shape before column_rise_aggregation (38032, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_2.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38040, 369)
shape after row_rise_aggregation (38040, 384)
shape before column_rise_aggregation (38040, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_71.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37702, 369)
shape after row_rise_aggregation (37702, 384)
shape before column_rise_aggregation (37702, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_14.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37925, 369)
shape after row_rise_aggregation (37925, 384)
shape before column_rise_aggregation (37925, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_63.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37631, 369)
shape after row_rise_aggregation (37631, 384)
shape before column_rise_aggregation (37631, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_271.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38031, 369)
shape after row_rise_aggregation (38031, 384)
shape before column_rise_aggregation (38031, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_256.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37841, 369)
shape after row_rise_aggregation (37841, 384)
shape before column_rise_aggregation (37841, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_137.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37815, 369)
shape after row_rise_aggregation (37815, 384)
shape before column_rise_aggregation (37815, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_18.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37823, 369)
shape after row_rise_aggregation (37823, 384)
shape before column_rise_aggregation (37823, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_80.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37863, 369)
shape after row_rise_aggregation (37863, 384)
shape before column_rise_aggregation (37863, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_298.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37882, 369)
shape after row_rise_aggregation (37882, 384)
shape before column_rise_aggregation (37882, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_1.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38085, 369)
shape after row_rise_aggregation (38085, 384)
shape before column_rise_aggregation (38085, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_165.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38213, 369)
shape after row_rise_aggregation (38213, 384)
shape before column_rise_aggregation (38213, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_152.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37998, 369)
shape after row_rise_aggregation (37998, 384)
shape before column_rise_aggregation (37998, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_107.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37888, 369)
shape after row_rise_aggregation (37888, 384)
shape before column_rise_aggregation (37888, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_166.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37851, 369)
shape after row_rise_aggregation (37851, 384)
shape before column_rise_aggregation (37851, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_27.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37916, 369)
shape after row_rise_aggregation (37916, 384)
shape before column_rise_aggregation (37916, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_123.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37780, 369)
shape after row_rise_aggregation (37780, 384)
shape before column_rise_aggregation (37780, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_267.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37945, 369)
shape after row_rise_aggregation (37945, 384)
shape before column_rise_aggregation (37945, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_75.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37754, 369)
shape after row_rise_aggregation (37754, 384)
shape before column_rise_aggregation (37754, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_215.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37960, 369)
shape after row_rise_aggregation (37960, 384)
shape before column_rise_aggregation (37960, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_131.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37758, 369)
shape after row_rise_aggregation (37758, 384)
shape before column_rise_aggregation (37758, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_149.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37816, 369)
shape after row_rise_aggregation (37816, 384)
shape before column_rise_aggregation (37816, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_85.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37781, 369)
shape after row_rise_aggregation (37781, 384)
shape before column_rise_aggregation (37781, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_84.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37907, 369)
shape after row_rise_aggregation (37907, 384)
shape before column_rise_aggregation (37907, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_102.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37926, 369)
shape after row_rise_aggregation (37926, 384)
shape before column_rise_aggregation (37926, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_175.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37758, 369)
shape after row_rise_aggregation (37758, 384)
shape before column_rise_aggregation (37758, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_69.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37965, 369)
shape after row_rise_aggregation (37965, 384)
shape before column_rise_aggregation (37965, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_172.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37820, 369)
shape after row_rise_aggregation (37820, 384)
shape before column_rise_aggregation (37820, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_183.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37829, 369)
shape after row_rise_aggregation (37829, 384)
shape before column_rise_aggregation (37829, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_158.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37972, 369)
shape after row_rise_aggregation (37972, 384)
shape before column_rise_aggregation (37972, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_98.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37935, 369)
shape after row_rise_aggregation (37935, 384)
shape before column_rise_aggregation (37935, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_248.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37960, 369)
shape after row_rise_aggregation (37960, 384)
shape before column_rise_aggregation (37960, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_141.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37709, 369)
shape after row_rise_aggregation (37709, 384)
shape before column_rise_aggregation (37709, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_50.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38038, 369)
shape after row_rise_aggregation (38038, 384)
shape before column_rise_aggregation (38038, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_182.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37753, 369)
shape after row_rise_aggregation (37753, 384)
shape before column_rise_aggregation (37753, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_251.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37704, 369)
shape after row_rise_aggregation (37704, 384)
shape before column_rise_aggregation (37704, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_213.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38289, 369)
shape after row_rise_aggregation (38289, 384)
shape before column_rise_aggregation (38289, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_41.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37983, 369)
shape after row_rise_aggregation (37983, 384)
shape before column_rise_aggregation (37983, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_197.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38111, 369)
shape after row_rise_aggregation (38111, 384)
shape before column_rise_aggregation (38111, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_196.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38008, 369)
shape after row_rise_aggregation (38008, 384)
shape before column_rise_aggregation (38008, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_164.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37820, 369)
shape after row_rise_aggregation (37820, 384)
shape before column_rise_aggregation (37820, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_281.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37923, 369)
shape after row_rise_aggregation (37923, 384)
shape before column_rise_aggregation (37923, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_220.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38001, 369)
shape after row_rise_aggregation (38001, 384)
shape before column_rise_aggregation (38001, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_45.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37866, 369)
shape after row_rise_aggregation (37866, 384)
shape before column_rise_aggregation (37866, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_180.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37983, 369)
shape after row_rise_aggregation (37983, 384)
shape before column_rise_aggregation (37983, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_54.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37848, 369)
shape after row_rise_aggregation (37848, 384)
shape before column_rise_aggregation (37848, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_67.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37839, 369)
shape after row_rise_aggregation (37839, 384)
shape before column_rise_aggregation (37839, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_169.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37921, 369)
shape after row_rise_aggregation (37921, 384)
shape before column_rise_aggregation (37921, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_89.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37817, 369)
shape after row_rise_aggregation (37817, 384)
shape before column_rise_aggregation (37817, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_160.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37908, 369)
shape after row_rise_aggregation (37908, 384)
shape before column_rise_aggregation (37908, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_258.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38025, 369)
shape after row_rise_aggregation (38025, 384)
shape before column_rise_aggregation (38025, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_23.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37919, 369)
shape after row_rise_aggregation (37919, 384)
shape before column_rise_aggregation (37919, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_155.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37966, 369)
shape after row_rise_aggregation (37966, 384)
shape before column_rise_aggregation (37966, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_272.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38014, 369)
shape after row_rise_aggregation (38014, 384)
shape before column_rise_aggregation (38014, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_13.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37859, 369)
shape after row_rise_aggregation (37859, 384)
shape before column_rise_aggregation (37859, 384)
shape after column_rise_aggregation (3083, 1885)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_205.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37815, 369)
shape after row_rise_aggregation (37815, 384)
shape before column_rise_aggregation (37815, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_227.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38008, 369)
shape after row_rise_aggregation (38008, 384)
shape before column_rise_aggregation (38008, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_257.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37779, 369)
shape after row_rise_aggregation (37779, 384)
shape before column_rise_aggregation (37779, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_286.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37875, 369)
shape after row_rise_aggregation (37875, 384)
shape before column_rise_aggregation (37875, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_238.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38061, 369)
shape after row_rise_aggregation (38061, 384)
shape before column_rise_aggregation (38061, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_174.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37768, 369)
shape after row_rise_aggregation (37768, 384)
shape before column_rise_aggregation (37768, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_212.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38161, 369)
shape after row_rise_aggregation (38161, 384)
shape before column_rise_aggregation (38161, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_295.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37837, 369)
shape after row_rise_aggregation (37837, 384)
shape before column_rise_aggregation (37837, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_145.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37823, 369)
shape after row_rise_aggregation (37823, 384)
shape before column_rise_aggregation (37823, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_185.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38076, 369)
shape after row_rise_aggregation (38076, 384)
shape before column_rise_aggregation (38076, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_120.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37580, 369)
shape after row_rise_aggregation (37580, 384)
shape before column_rise_aggregation (37580, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_76.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37821, 369)
shape after row_rise_aggregation (37821, 384)
shape before column_rise_aggregation (37821, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_81.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37721, 369)
shape after row_rise_aggregation (37721, 384)
shape before column_rise_aggregation (37721, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_284.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37717, 369)
shape after row_rise_aggregation (37717, 384)
shape before column_rise_aggregation (37717, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_268.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38157, 369)
shape after row_rise_aggregation (38157, 384)
shape before column_rise_aggregation (38157, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_190.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37720, 369)
shape after row_rise_aggregation (37720, 384)
shape before column_rise_aggregation (37720, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_198.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37748, 369)
shape after row_rise_aggregation (37748, 384)
shape before column_rise_aggregation (37748, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_113.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37924, 369)
shape after row_rise_aggregation (37924, 384)
shape before column_rise_aggregation (37924, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_33.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38055, 369)
shape after row_rise_aggregation (38055, 384)
shape before column_rise_aggregation (38055, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_221.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37929, 369)
shape after row_rise_aggregation (37929, 384)
shape before column_rise_aggregation (37929, 384)
shape after column_rise_aggregation (3083, 1881)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_10.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37948, 369)
shape after row_rise_aggregation (37948, 384)
shape before column_rise_aggregation (37948, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_249.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38024, 369)
shape after row_rise_aggregation (38024, 384)
shape before column_rise_aggregation (38024, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_47.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38009, 369)
shape after row_rise_aggregation (38009, 384)
shape before column_rise_aggregation (38009, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_36.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37938, 369)
shape after row_rise_aggregation (37938, 384)
shape before column_rise_aggregation (37938, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_280.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37952, 369)
shape after row_rise_aggregation (37952, 384)
shape before column_rise_aggregation (37952, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_208.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37878, 369)
shape after row_rise_aggregation (37878, 384)
shape before column_rise_aggregation (37878, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_24.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37845, 369)
shape after row_rise_aggregation (37845, 384)
shape before column_rise_aggregation (37845, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_93.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37934, 369)
shape after row_rise_aggregation (37934, 384)
shape before column_rise_aggregation (37934, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_245.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38007, 369)
shape after row_rise_aggregation (38007, 384)
shape before column_rise_aggregation (38007, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_252.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37945, 369)
shape after row_rise_aggregation (37945, 384)
shape before column_rise_aggregation (37945, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_288.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37803, 369)
shape after row_rise_aggregation (37803, 384)
shape before column_rise_aggregation (37803, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_211.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37996, 369)
shape after row_rise_aggregation (37996, 384)
shape before column_rise_aggregation (37996, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_42.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37760, 369)
shape after row_rise_aggregation (37760, 384)
shape before column_rise_aggregation (37760, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_292.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37920, 369)
shape after row_rise_aggregation (37920, 384)
shape before column_rise_aggregation (37920, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_121.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37923, 369)
shape after row_rise_aggregation (37923, 384)
shape before column_rise_aggregation (37923, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_186.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37778, 369)
shape after row_rise_aggregation (37778, 384)
shape before column_rise_aggregation (37778, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_19.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37932, 369)
shape after row_rise_aggregation (37932, 384)
shape before column_rise_aggregation (37932, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_296.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38042, 369)
shape after row_rise_aggregation (38042, 384)
shape before column_rise_aggregation (38042, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_97.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38019, 369)
shape after row_rise_aggregation (38019, 384)
shape before column_rise_aggregation (38019, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_5.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38028, 369)
shape after row_rise_aggregation (38028, 384)
shape before column_rise_aggregation (38028, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_243.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37893, 369)
shape after row_rise_aggregation (37893, 384)
shape before column_rise_aggregation (37893, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_35.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38061, 369)
shape after row_rise_aggregation (38061, 384)
shape before column_rise_aggregation (38061, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_237.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37832, 369)
shape after row_rise_aggregation (37832, 384)
shape before column_rise_aggregation (37832, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_255.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37848, 369)
shape after row_rise_aggregation (37848, 384)
shape before column_rise_aggregation (37848, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_44.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37777, 369)
shape after row_rise_aggregation (37777, 384)
shape before column_rise_aggregation (37777, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_159.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38042, 369)
shape after row_rise_aggregation (38042, 384)
shape before column_rise_aggregation (38042, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_203.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37822, 369)
shape after row_rise_aggregation (37822, 384)
shape before column_rise_aggregation (37822, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_83.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37717, 369)
shape after row_rise_aggregation (37717, 384)
shape before column_rise_aggregation (37717, 384)
shape after column_rise_aggregation (3083, 1889)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_294.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37801, 369)
shape after row_rise_aggregation (37801, 384)
shape before column_rise_aggregation (37801, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_11.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37959, 369)
shape after row_rise_aggregation (37959, 384)
shape before column_rise_aggregation (37959, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_20.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37990, 369)
shape after row_rise_aggregation (37990, 384)
shape before column_rise_aggregation (37990, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_34.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37850, 369)
shape after row_rise_aggregation (37850, 384)
shape before column_rise_aggregation (37850, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_4.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37858, 369)
shape after row_rise_aggregation (37858, 384)
shape before column_rise_aggregation (37858, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_65.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38020, 369)
shape after row_rise_aggregation (38020, 384)
shape before column_rise_aggregation (38020, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_219.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37931, 369)
shape after row_rise_aggregation (37931, 384)
shape before column_rise_aggregation (37931, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_39.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37722, 369)
shape after row_rise_aggregation (37722, 384)
shape before column_rise_aggregation (37722, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_52.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37962, 369)
shape after row_rise_aggregation (37962, 384)
shape before column_rise_aggregation (37962, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_133.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38004, 369)
shape after row_rise_aggregation (38004, 384)
shape before column_rise_aggregation (38004, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_236.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37936, 369)
shape after row_rise_aggregation (37936, 384)
shape before column_rise_aggregation (37936, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_161.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38081, 369)
shape after row_rise_aggregation (38081, 384)
shape before column_rise_aggregation (38081, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_200.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37872, 369)
shape after row_rise_aggregation (37872, 384)
shape before column_rise_aggregation (37872, 384)
shape after column_rise_aggregation (3083, 1857)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_289.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37926, 369)
shape after row_rise_aggregation (37926, 384)
shape before column_rise_aggregation (37926, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_90.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37889, 369)
shape after row_rise_aggregation (37889, 384)
shape before column_rise_aggregation (37889, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_32.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37758, 369)
shape after row_rise_aggregation (37758, 384)
shape before column_rise_aggregation (37758, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_266.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37840, 369)
shape after row_rise_aggregation (37840, 384)
shape before column_rise_aggregation (37840, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_134.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37985, 369)
shape after row_rise_aggregation (37985, 384)
shape before column_rise_aggregation (37985, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_109.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38075, 369)
shape after row_rise_aggregation (38075, 384)
shape before column_rise_aggregation (38075, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_46.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37777, 369)
shape after row_rise_aggregation (37777, 384)
shape before column_rise_aggregation (37777, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_139.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37993, 369)
shape after row_rise_aggregation (37993, 384)
shape before column_rise_aggregation (37993, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_22.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37800, 369)
shape after row_rise_aggregation (37800, 384)
shape before column_rise_aggregation (37800, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_265.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37770, 369)
shape after row_rise_aggregation (37770, 384)
shape before column_rise_aggregation (37770, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_260.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (37762, 369)
shape after row_rise_aggregation (37762, 384)
shape before column_rise_aggregation (37762, 384)
shape after column_rise_aggregation (3083, 1897)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_119.ftr


  0%|          | 0/3083 [00:00<?, ?it/s]

final shape (3083, 178)
shape before row_rise_aggregation (38013, 369)
shape after row_rise_aggregation (38013, 384)
shape before column_rise_aggregation (38013, 384)
shape after column_rise_aggregation (3083, 1893)
diff between last and mean transaction


  0%|          | 0/177 [00:00<?, ?it/s]

save processed test_225.ftr


  0%|          | 0/300 [00:00<?, ?it/s]

In [None]:
test = pd.concat(map(lambda sample_df: pd.read_feather(sample_df), tqdm_notebook(glob.glob('test_*.ftr'))))


In [None]:
test.info()
gc.collect()

In [None]:
test.set_index('customer_ID')

In [None]:
train

In [None]:
test.reset_index(drop=True).to_feather('test_fin_df.ftr')

In [None]:
train = pd.concat(map(lambda sample_df: pd.read_feather(sample_df), tqdm_notebook(glob.glob('train_*.ftr'))))

In [None]:
train = pd.read_feather('train_fin_df.ftr')

In [None]:
train

In [None]:
def reduce_size(df):
# Transform float64 columns to float32
    print("reduce float data size")
    cols = list(df.dtypes[df.dtypes == 'float64'].index)
    for col in tqdm_notebook(cols):
        df[col] = df[col].astype(np.float32)
    # Transform int64 columns to int32
    print("reduce cat data size")
    cols = list(df.dtypes[df.dtypes == 'int64'].index)
    for col in tqdm_notebook(cols):
        df[col] = df[col].astype(np.int32)
    return df
        
train = reduce_size(train)
test = reduce_size(test)

In [None]:
def missing_values_table(df):
    # Total missing values by column
    mis_val = df.isnull().sum()

    # Percentage of missing values by column
    mis_val_percent = 100 * df.isnull().sum() / len(df)

    # build a table with the thw columns
    mis_val_table = pd.concat([mis_val, mis_val_percent], axis=1)

    # Rename the columns
    mis_val_table_ren_columns = mis_val_table.rename(
    columns = {0 : 'Missing Values', 1 : '% of Total Values'})

    # Sort the table by percentage of missing descending
    mis_val_table_ren_columns = mis_val_table_ren_columns[
        mis_val_table_ren_columns.iloc[:,1] != 0].sort_values(
    '% of Total Values', ascending=False).round(1)

    # Print some summary information
    print ("Your selected dataframe has " + str(df.shape[1]) + " columns.\n"      
        "There are " + str(mis_val_table_ren_columns.shape[0]) +
          " columns that have missing values.")

    # Return the dataframe with missing information
    return mis_val_table_ren_columns

# Missing values for training data
missing_values_train = missing_values_table(test)
#cm = sns.color_palette('Set2', as_cmap=True)
#missing_values_train[:20]#.style.background_gradient(cmap=cm)
THRESHOLD = 80
print(test.shape)
drop_cols = missing_values_train[missing_values_train['% of Total Values']>THRESHOLD].index.to_list()
print(f"Drop {len(drop_cols)} features with more than {THRESHOLD}% of missing values")

test = test.drop(drop_cols,axis=1)
print("Training data shape after dropping highly missing values columns", test.shape)

In [None]:
corr = train.corrwith(train['target'], axis=0)
corr = corr[corr.notna()].sort_values(key=abs, ascending=False)
THRESHOLD = 0.15
CORR_SELECTION=True
if CORR_SELECTION:
    selected_feats = corr[corr.abs()>THRESHOLD].index
    train = train[list(selected_feats)]
    print(f"Training data shape after dropping uncorrelated features"
          f"(threshold Pearson correlation = {THRESHOLD})", 
          train.shape)

In [None]:
train[list(selected_feats)]

In [None]:
gc.collect()

In [None]:
NaN_Val = np.array(train.isnull().sum())
NaN_prec = np.array((train.isnull().sum() * 100 / len(train)).round(2))
NaN_Col = pd.DataFrame([np.array(list(train.columns)).T,NaN_Val.T,NaN_prec.T,np.array(list(train.dtypes)).T], index=['Features','Num of Missing values','Percentage','DataType']
).transpose()
pd.set_option('display.max_rows', None)
NaN_Col

In [None]:
features = [col for col in train.columns if col != "customer_ID"]


In [None]:
for col in features:
    train[col] = train[col].fillna(train[col].median())

In [None]:
types = train.dtypes
target_col = 'target'

cat_cols = list(types[types.apply(lambda x:not(str(x).startswith('float')))].index)
cat_cols = list(filter(lambda x:x!=target_col, cat_cols))
features = list(train.drop(target_col, axis=1).columns)
gc.collect()
print('len cat_col', len(cat_cols))
print('len features', len(features))
    
with open('features.pkl', 'wb') as f:
    pickle.dump(features, f)

with open('cat_cols.pkl', 'wb') as f:
    pickle.dump(cat_cols, f)

In [None]:
train['target'] = train["target"].astype('int')
print('Transform all String features to category.\n')
os.makedirs('label_encoders')

for usecol in tqdm_notebook(cat_cols):
#    print(usecol)
    train[usecol] = train[usecol].astype('str')
#    test[usecol] = test[usecol].astype('str')

    #Fit LabelEncoder
    le = LabelEncoder().fit(
            np.unique(train[usecol].unique().tolist()))#+
#                      test[usecol].unique().tolist()))

    #At the end 0 will be used for null values so we start at 1 
    train[usecol] = le.transform(train[usecol])+1
#    test[usecol]  = le.transform(test[usecol])+1

    train[usecol] = train[usecol].replace(np.nan, 0).astype('int').astype('category')
#    test[usecol]  = test[usecol].replace(np.nan, 0).astype('int').astype('category')

    joblib.dump(le, f'label_encoders/{usecol}_label_encoder.pkl')

In [None]:
X = train.drop(['target'],axis = 1)
y = train['target']

In [None]:
X.shape , y.shape

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=75, stratify=y)

In [None]:
from lightgbm import LGBMClassifier

In [None]:
lgb = LGBMClassifier(objective= 'binary',
        metric= 'binary_logloss',
        boosting= 'dart',
        seed= 75,
        num_leaves= 100,
        learning_rate= 0.01,
        feature_fraction= 0.20,
        bagging_freq= 10,
        bagging_fraction= 0.50,
        n_jobs= -1,
        lambda_l2= 2,
        min_data_in_leaf= 40,
        n_estimators=15000)

In [None]:
lgb.fit(X_train, y_train, eval_metric='binary_logloss', eval_set=[(X_test, y_test)], early_stopping_rounds=100,
        verbose=150)

In [None]:
joblib.dump(lgb, "lgb_model_with_diff.sav")

In [None]:
del train
gc.collect()

In [None]:
from functools import partial, reduce

models_folder = '/kaggle/working/label_encoders/'

def get_sample(path):
    test = pd.read_feather(path).set_index('customer_ID')

    test = reduce_size(test)
    
    test = test[features]
    
    gc.collect()

    for usecol in tqdm_notebook(cat_cols):
        le = joblib.load(os.path.join(models_folder,f'{usecol}_label_encoder.pkl'))
        test[usecol] = test[usecol].astype('str').apply(lambda x:x.split('.')[0])
        test[usecol] = test[usecol].map(lambda s: '<unknown>' if s not in le.classes_ else s)
        le.classes_ = np.append(le.classes_, '<unknown>')
        #At the end 0 will be used for null values so we start at 1 
        test[usecol]  = le.transform(test[usecol])+1
        test[usecol] = test[usecol].replace(np.nan, 0).astype('int').astype('category')
    return test



def get_batchs(df, batch_size, keep_features):
    n_batchs = int(len(df)/batch_size)
    cid = list(df.index)
    for i in range(n_batchs):
        idx = cid[i*batch_size:(i+1)*batch_size]
        if i == n_batchs-1:
            idx = idx + cid[(i+1)*batch_size:]
        yield df.loc[idx, keep_features]
        
def predict(sample_df, model):
    y_pred = model.predict(sample_df)
    sub = pd.Series(y_pred, index=sample_df.index, name='prediction')
#    sub.to_frame().to_csv(outfile)
    return sub

In [None]:
n_batchs=50
sub_lg = pd.Series()
for path in glob.glob("test_*.ftr"):
    print(path)
    test = get_sample(path)
    samples_df = get_batchs(test,batch_size=n_batchs, keep_features=features)
    sample_sub_lg = pd.concat(map(partial(predict, model=lgb), tqdm_notebook(samples_df)))
    sub_lg = pd.concat([sub_lg, sample_sub_lg], ignore_index=True)
    del test
    gc.collect()
    
print(len(sub_lg))

sub_lg.to_frame().to_csv('submission_lgb_with_diff.csv')

In [None]:
loaded_model = joblib.load('/kaggle/working/lgb_model_with_diff.sav')

In [None]:
lgb_pred = loaded_model.predict()

In [None]:
sample_dataset = pd.read_csv('/kaggle/input/amex-default-prediction/sample_submission.csv')
output = pd.DataFrame({'customer_ID': sample_dataset.customer_ID, 'prediction': predictions})
output.to_csv('submission1.csv', index=False)