# Фичи по PTLS   

Цель: предсказать для каждого пользователя взятие/ невзятие каждого из четырех продуктов **в течение месяца после отчетной даты**, исторические данные по ним находятся в targets

In [24]:
import numpy as np

import pandas as pd
from pandas.api.types import is_float_dtype, is_integer_dtype

from collections import Counter
from sklearn.utils import resample

from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

import math

from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity

import gc
import glob
import pyarrow.parquet as pq
from tqdm import trange, tqdm

In [25]:
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.decomposition import PCA
from scipy.spatial.distance import pdist, squareform

In [26]:
from typing import List, Optional, Tuple

In [27]:
import warnings

warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=UserWarning, module='pandas')

In [28]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
# найтройки
# Убираем ограничение отображемых колонок
pd.set_option("display.max_columns", None)
# Устанавливаем тему по умолчанию
sb_dark = sns.dark_palette('skyblue', 8, reverse=True) # teal
sns.set(palette=sb_dark)

In [29]:
# Включаем tqdm для pandas, чтобы можно было запускать progress_apply() вместо простого apply()
tqdm.pandas() 
pd.options.display.max_columns = None
pd.options.display.max_rows = 200

In [30]:
eps = 1e-6

In [31]:
PATH = ''
PATH_DATASET_OUTPUT = PATH + 'datasets/'
PATH_DATASET_PTLS = PATH_DATASET_OUTPUT + 'ptls/'

In [167]:
ptls_type = 'trx'
ptls_type = 'geo'
ptls_filename = f'{ptls_type}_emb_select_1_v1.parquet'
ptls_filename

'geo_emb_select_1_v1.parquet'

In [168]:
trx_emb_PTLS_df = pd.read_parquet(PATH_DATASET_PTLS + ptls_filename)
trx_emb_PTLS_df['report_next_end'] = pd.to_datetime(trx_emb_PTLS_df['report_next_end'])
# trx_emb_PTLS_df['trx_date'] = trx_emb_PTLS_df['report_next_end'] - pd.DateOffset(days=45)
trx_emb_PTLS_df = trx_emb_PTLS_df.rename(columns={'emb_trx': 'ptls_embedding'})
trx_emb_PTLS_df.shape

(396434, 3)

In [169]:
trx_emb_PTLS_df

Unnamed: 0,client_id,report_next_end,ptls_embedding
3,004c7779a0ecbcd972a24627f32dcbbd4d63c610a9ee0a...,2022-11-30,"[-0.07313454, -0.23789532, 0.07221676, -0.8108..."
20,01703640603e9dfa59cd76cc233d086ecc34d3ece32c56...,2022-11-30,"[0.03763936, -0.15345033, 0.046564437, -0.0004..."
23,0191f3d77d4c8f28ad676fe2e1fda9e3c7a9dc0351f0d0...,2022-11-30,"[0.0062002423, -0.23049307, -0.001009638, -0.3..."
29,01df36c22b9fd7e8ad8d7b1c396b2b1367a2606e5b6255...,2022-11-30,"[0.12923148, -0.14685392, 0.015514831, -0.4604..."
40,0292402a3043ce0dc6335c9461a7aa68fbc88d8a49e63d...,2022-11-30,"[0.09086008, -0.24302137, 0.047183976, 0.38375..."
...,...,...,...
788887,f556afdbb6a63a7bedab03ec5abbd3e04980ec76c60e8e...,2022-12-30,"[-0.02733832, -0.05947024, 0.046486262, -0.062..."
788918,f7811e306436328798b252bb698a0affc552304c3272fe...,2022-12-30,"[-0.0011212913, -0.253754, 0.046030834, -0.275..."
788942,f8ebd0078c17acad1d9bfc9bc87c890f742529e447c90d...,2022-12-30,"[0.075992145, 0.38542974, 0.0980213, -0.807597..."
789023,fe3f567e6cb1803ca215ebe293b650ab6ccf25561cbafa...,2022-12-30,"[0.0020023594, -0.4354954, 0.052867875, 0.0492..."


In [170]:
# %%time
# # Переформатируем данные по эмбеддингам
# emb_size = 256
# emb_cols = [f'embed_{i}' for i in range(emb_size)]
# # emb_cols = trx_emb_PTLS_df.columns[:emb_size]
# trx_emb_PTLS_df['trx_embedding'] = trx_emb_PTLS_df.apply(lambda x: x[emb_cols].values, axis=1)
# trx_emb_PTLS_df = trx_emb_PTLS_df[['client_id', 'mon', 'trx_embedding']]
# gc.collect()
# trx_emb_PTLS_df.shape

In [171]:
# # trx_emb_PTLS_df['report_next_end'] = 
# report_next_end = ['2023-01-31', '2022-11-30', '2022-12-30']
# trx_emb_PTLS_df['report_next_end'] = np.random.choice(list(report_next_end), len(trx_emb_PTLS_df))
# trx_emb_PTLS_df

In [172]:
# trx_emb_PTLS_df['ptls_embedding'].apply(lambda x: np.min(x, axis=0))

### Разбивка эмбеддинга на подгруппы и рассчет статистик отдельно для них 

In [173]:
%%time
length_embedding = 256
part_size = 16 # должно быть одно из делителей 256: 1, 2, 4, 8, 16, 32, 64, 128 и 256
count_parts_emb = math.ceil(length_embedding/part_size)
columns_part = []
for i in range(count_parts_emb):
    columns_part.append(f'{i+1}_part_emb')
len(columns_part)

def split_emb(x):
    split_embeddings = [x[i*part_size:(i+1)*part_size] for i in range(count_parts_emb)]
    return split_embeddings
trx_emb_PTLS_df[columns_part] = trx_emb_PTLS_df['ptls_embedding'].apply(lambda x: pd.Series(split_emb(x)))
trx_emb_PTLS_df.shape

CPU times: total: 19.3 s
Wall time: 19.3 s


(396434, 19)

### Рассчет статистик для эмбеддингов PTLS

In [174]:
def calc_emb_stats(data, col):
    data[f'min_{col}'] = data[col].apply(lambda x: np.min(x, axis=0))
    data[f'max_{col}'] = data[col].apply(lambda x: np.max(x, axis=0))
    data[f'sum_{col}'] = data[col].apply(lambda x: np.sum(x, axis=0))
    data[f'std_{col}'] = data[col].apply(lambda x: np.std(x, axis=0))
    data[f'mean_{col}'] = data[col].apply(lambda x: np.mean(x, axis=0))    
    data[f'median_{col}'] = data[col].apply(lambda x: np.median(x, axis=0))    
    return data

In [175]:
%%time
# Формируем фичи по таргету
begin_date = datetime(2022, 1, 1, 0, 0, 0)
start_date = datetime(2022, 1, 1, 0, 0, 0)

end_date = datetime(2023, 1, 31, 0, 0, 0)

# Бланк-датафрейм с клиентами 
# uniq_clients_df = trx_emb_PTLS_df[['client_id']].drop_duplicates()

# Итоговый датасет 
union_ptls_trx_agg_df = pd.DataFrame()

# Бежим по месяцам и расчитываем статистики для клиента берем предыдущие месяцы
for i in trange(((end_date - start_date).days//30 + 1)):
    end_date = start_date + relativedelta(months=1) - relativedelta(days=1)
    print(f'start: {start_date}, end: {end_date}')    
    report_next_end = start_date + relativedelta(months=2) - relativedelta(days=1)
    
#     select_mon_current_df = trx_emb_PTLS_df[trx_emb_PTLS_df['event_time'].between(start_date, end_date)]
    select_mon_current_df = trx_emb_PTLS_df[trx_emb_PTLS_df['report_next_end'] == report_next_end]
    # select_mon_full_df = all_dialogs_df[all_dialogs_df['event_time'].between(begin_date, end_date)]
    print(select_mon_current_df.shape)
    
    client_agg_df = select_mon_current_df[['client_id']].drop_duplicates().copy()
    client_agg_df['report_next_end'] = report_next_end
    client_agg_df = client_agg_df.set_index('client_id')    
    print(client_agg_df.shape)
    
    # Считаем статистики только за прошедший месяц
    stats_mon = calc_emb_stats(data=select_mon_current_df[['client_id', 'report_next_end', 'ptls_embedding']], col='ptls_embedding')
    stats_mon = stats_mon.set_index(['client_id', 'report_next_end'])
    # print(stats_mon.shape)
    client_agg_df = client_agg_df.merge(stats_mon.add_prefix('ptls_trx_'), left_index=True, right_index=True, how='left')
    for col_part in columns_part:
        # print(col_part)
        stats_mon = calc_emb_stats(data=select_mon_current_df[['client_id', 'report_next_end', col_part]], col=col_part)
        stats_mon = stats_mon.set_index(['client_id', 'report_next_end'])
        client_agg_df = client_agg_df.merge(stats_mon.add_prefix(f'ptls_{ptls_type}_{col_part}'), left_index=True, right_index=True, how='left')
        
    
    union_ptls_trx_agg_df = pd.concat([union_ptls_trx_agg_df, client_agg_df])
    start_date = start_date + relativedelta(months=1)
union_ptls_trx_agg_df.shape    

  0%|          | 0/14 [00:00<?, ?it/s]

start: 2022-01-01 00:00:00, end: 2022-01-31 00:00:00
(29411, 19)
(29411, 1)


  7%|▋         | 1/14 [00:19<04:10, 19.26s/it]

start: 2022-02-01 00:00:00, end: 2022-02-28 00:00:00
(35764, 19)
(35764, 1)


 14%|█▍        | 2/14 [00:43<04:22, 21.90s/it]

start: 2022-03-01 00:00:00, end: 2022-03-31 00:00:00
(33716, 19)
(33716, 1)


 21%|██▏       | 3/14 [01:05<04:05, 22.34s/it]

start: 2022-04-01 00:00:00, end: 2022-04-30 00:00:00
(29985, 19)
(29985, 1)


 29%|██▊       | 4/14 [01:26<03:35, 21.56s/it]

start: 2022-05-01 00:00:00, end: 2022-05-31 00:00:00
(29993, 19)
(29993, 1)


 36%|███▌      | 5/14 [01:46<03:09, 21.05s/it]

start: 2022-06-01 00:00:00, end: 2022-06-30 00:00:00
(28187, 19)
(28187, 1)


 43%|████▎     | 6/14 [02:05<02:42, 20.29s/it]

start: 2022-07-01 00:00:00, end: 2022-07-31 00:00:00
(30052, 19)
(30052, 1)


 50%|█████     | 7/14 [02:25<02:21, 20.27s/it]

start: 2022-08-01 00:00:00, end: 2022-08-31 00:00:00
(27366, 19)
(27366, 1)


 57%|█████▋    | 8/14 [02:43<01:58, 19.73s/it]

start: 2022-09-01 00:00:00, end: 2022-09-30 00:00:00
(25320, 19)
(25320, 1)


 64%|██████▍   | 9/14 [03:01<01:34, 18.99s/it]

start: 2022-10-01 00:00:00, end: 2022-10-31 00:00:00
(45299, 19)
(45299, 1)


 71%|███████▏  | 10/14 [03:32<01:30, 22.64s/it]

start: 2022-11-01 00:00:00, end: 2022-11-30 00:00:00
(24695, 19)
(24695, 1)


 79%|███████▊  | 11/14 [03:48<01:02, 20.83s/it]

start: 2022-12-01 00:00:00, end: 2022-12-31 00:00:00
(37681, 19)
(37681, 1)


 86%|████████▌ | 12/14 [04:14<00:44, 22.42s/it]

start: 2023-01-01 00:00:00, end: 2023-01-31 00:00:00
(0, 19)
(0, 1)


 93%|█████████▎| 13/14 [04:15<00:15, 15.70s/it]

start: 2023-02-01 00:00:00, end: 2023-02-28 00:00:00
(0, 19)
(0, 1)


100%|██████████| 14/14 [04:15<00:00, 18.24s/it]

CPU times: total: 4min 14s
Wall time: 4min 15s





(377469, 120)

In [176]:
union_ptls_trx_agg_df = union_ptls_trx_agg_df.drop(columns=['report_next_end'], errors='ignore')
union_ptls_trx_agg_df.shape

(377469, 119)

In [177]:
union_ptls_trx_agg_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 377469 entries, ('007dde6874ae2cf31e1fa5d1ffd21d57c11d36f29acb0bb765377cdd9716d91a', Timestamp('2022-02-28 00:00:00')) to ('ff6d8e1147bb38af8fe15649a4e749fe5537ad898a1d99f89c7f4d8755bc2e23', Timestamp('2023-01-31 00:00:00'))
Columns: 119 entries, ptls_trx_ptls_embedding to ptls_geo_16_part_embmedian_16_part_emb
dtypes: float32(102), object(17)
memory usage: 207.9+ MB


In [178]:
union_ptls_trx_agg_df

Unnamed: 0_level_0,Unnamed: 1_level_0,ptls_trx_ptls_embedding,ptls_trx_min_ptls_embedding,ptls_trx_max_ptls_embedding,ptls_trx_sum_ptls_embedding,ptls_trx_std_ptls_embedding,ptls_trx_mean_ptls_embedding,ptls_trx_median_ptls_embedding,ptls_geo_1_part_emb1_part_emb,ptls_geo_1_part_embmin_1_part_emb,ptls_geo_1_part_embmax_1_part_emb,ptls_geo_1_part_embsum_1_part_emb,ptls_geo_1_part_embstd_1_part_emb,ptls_geo_1_part_embmean_1_part_emb,ptls_geo_1_part_embmedian_1_part_emb,ptls_geo_2_part_emb2_part_emb,ptls_geo_2_part_embmin_2_part_emb,ptls_geo_2_part_embmax_2_part_emb,ptls_geo_2_part_embsum_2_part_emb,ptls_geo_2_part_embstd_2_part_emb,ptls_geo_2_part_embmean_2_part_emb,ptls_geo_2_part_embmedian_2_part_emb,ptls_geo_3_part_emb3_part_emb,ptls_geo_3_part_embmin_3_part_emb,ptls_geo_3_part_embmax_3_part_emb,ptls_geo_3_part_embsum_3_part_emb,ptls_geo_3_part_embstd_3_part_emb,ptls_geo_3_part_embmean_3_part_emb,ptls_geo_3_part_embmedian_3_part_emb,ptls_geo_4_part_emb4_part_emb,ptls_geo_4_part_embmin_4_part_emb,ptls_geo_4_part_embmax_4_part_emb,ptls_geo_4_part_embsum_4_part_emb,ptls_geo_4_part_embstd_4_part_emb,ptls_geo_4_part_embmean_4_part_emb,ptls_geo_4_part_embmedian_4_part_emb,ptls_geo_5_part_emb5_part_emb,ptls_geo_5_part_embmin_5_part_emb,ptls_geo_5_part_embmax_5_part_emb,ptls_geo_5_part_embsum_5_part_emb,ptls_geo_5_part_embstd_5_part_emb,ptls_geo_5_part_embmean_5_part_emb,ptls_geo_5_part_embmedian_5_part_emb,ptls_geo_6_part_emb6_part_emb,ptls_geo_6_part_embmin_6_part_emb,ptls_geo_6_part_embmax_6_part_emb,ptls_geo_6_part_embsum_6_part_emb,ptls_geo_6_part_embstd_6_part_emb,ptls_geo_6_part_embmean_6_part_emb,ptls_geo_6_part_embmedian_6_part_emb,ptls_geo_7_part_emb7_part_emb,ptls_geo_7_part_embmin_7_part_emb,ptls_geo_7_part_embmax_7_part_emb,ptls_geo_7_part_embsum_7_part_emb,ptls_geo_7_part_embstd_7_part_emb,ptls_geo_7_part_embmean_7_part_emb,ptls_geo_7_part_embmedian_7_part_emb,ptls_geo_8_part_emb8_part_emb,ptls_geo_8_part_embmin_8_part_emb,ptls_geo_8_part_embmax_8_part_emb,ptls_geo_8_part_embsum_8_part_emb,ptls_geo_8_part_embstd_8_part_emb,ptls_geo_8_part_embmean_8_part_emb,ptls_geo_8_part_embmedian_8_part_emb,ptls_geo_9_part_emb9_part_emb,ptls_geo_9_part_embmin_9_part_emb,ptls_geo_9_part_embmax_9_part_emb,ptls_geo_9_part_embsum_9_part_emb,ptls_geo_9_part_embstd_9_part_emb,ptls_geo_9_part_embmean_9_part_emb,ptls_geo_9_part_embmedian_9_part_emb,ptls_geo_10_part_emb10_part_emb,ptls_geo_10_part_embmin_10_part_emb,ptls_geo_10_part_embmax_10_part_emb,ptls_geo_10_part_embsum_10_part_emb,ptls_geo_10_part_embstd_10_part_emb,ptls_geo_10_part_embmean_10_part_emb,ptls_geo_10_part_embmedian_10_part_emb,ptls_geo_11_part_emb11_part_emb,ptls_geo_11_part_embmin_11_part_emb,ptls_geo_11_part_embmax_11_part_emb,ptls_geo_11_part_embsum_11_part_emb,ptls_geo_11_part_embstd_11_part_emb,ptls_geo_11_part_embmean_11_part_emb,ptls_geo_11_part_embmedian_11_part_emb,ptls_geo_12_part_emb12_part_emb,ptls_geo_12_part_embmin_12_part_emb,ptls_geo_12_part_embmax_12_part_emb,ptls_geo_12_part_embsum_12_part_emb,ptls_geo_12_part_embstd_12_part_emb,ptls_geo_12_part_embmean_12_part_emb,ptls_geo_12_part_embmedian_12_part_emb,ptls_geo_13_part_emb13_part_emb,ptls_geo_13_part_embmin_13_part_emb,ptls_geo_13_part_embmax_13_part_emb,ptls_geo_13_part_embsum_13_part_emb,ptls_geo_13_part_embstd_13_part_emb,ptls_geo_13_part_embmean_13_part_emb,ptls_geo_13_part_embmedian_13_part_emb,ptls_geo_14_part_emb14_part_emb,ptls_geo_14_part_embmin_14_part_emb,ptls_geo_14_part_embmax_14_part_emb,ptls_geo_14_part_embsum_14_part_emb,ptls_geo_14_part_embstd_14_part_emb,ptls_geo_14_part_embmean_14_part_emb,ptls_geo_14_part_embmedian_14_part_emb,ptls_geo_15_part_emb15_part_emb,ptls_geo_15_part_embmin_15_part_emb,ptls_geo_15_part_embmax_15_part_emb,ptls_geo_15_part_embsum_15_part_emb,ptls_geo_15_part_embstd_15_part_emb,ptls_geo_15_part_embmean_15_part_emb,ptls_geo_15_part_embmedian_15_part_emb,ptls_geo_16_part_emb16_part_emb,ptls_geo_16_part_embmin_16_part_emb,ptls_geo_16_part_embmax_16_part_emb,ptls_geo_16_part_embsum_16_part_emb,ptls_geo_16_part_embstd_16_part_emb,ptls_geo_16_part_embmean_16_part_emb,ptls_geo_16_part_embmedian_16_part_emb
client_id,report_next_end,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1
007dde6874ae2cf31e1fa5d1ffd21d57c11d36f29acb0bb765377cdd9716d91a,2022-02-28,"[-0.078968175, -0.23500663, 0.05919083, -0.557...",-0.998582,0.951422,3.194314,0.327440,0.012478,0.003701,"[-0.078968175, -0.23500663, 0.05919083, -0.557...",-0.575600,0.603568,0.225144,0.305752,0.014072,0.033138,"[-0.08395323, 0.02004097, -0.5303751, 0.056209...",-0.530375,0.670828,-0.058323,0.231485,-0.003645,0.004147,"[-0.65378606, -0.23558432, -0.23460373, 0.7117...",-0.997725,0.872122,-0.150418,0.440058,-0.009401,-0.009148,"[0.07507902, 0.07671329, 0.20941205, -0.125327...",-0.340870,0.575544,1.758135,0.249957,0.109883,0.062965,"[0.06761453, 0.5182285, 0.21567127, 0.00919340...",-0.044946,0.663223,2.071700,0.195187,0.129481,0.052791,"[0.12314464, 0.053006068, -0.0055607045, 0.019...",-0.998582,0.370273,-1.998964,0.347260,-0.124935,0.018606,"[-0.014437863, 0.006391916, -0.08612096, 0.044...",-0.819201,0.334298,-1.173141,0.275143,-0.073321,-0.000632,"[0.029421609, 0.41540602, -0.030727815, 0.0234...",-0.856917,0.951422,1.416456,0.396396,0.088528,0.015230,"[-0.17107807, -0.084296905, 0.41416422, -0.170...",-0.361125,0.891267,1.300018,0.291665,0.081251,0.026280,"[-0.7380189, 0.003744539, 0.85330105, -0.05281...",-0.738019,0.853301,0.224114,0.379635,0.014007,-0.012043,"[-0.0185338, -0.0057950937, 0.014312199, -0.09...",-0.246748,0.531303,0.609719,0.164229,0.038107,-0.003944,"[-0.9003833, -0.03115392, -0.07178653, -0.1663...",-0.900383,0.089142,-2.224995,0.241374,-0.139062,-0.037598,"[0.083028935, -0.053060587, -0.2066818, -0.917...",-0.917931,0.672246,0.594559,0.365712,0.037160,0.023541,"[-0.062102567, 0.045555968, 0.8421839, 0.01220...",-0.616170,0.850369,-0.269674,0.377681,-0.016855,-0.035785,"[-0.008770652, 0.14008719, -0.07696907, -0.026...",-0.217737,0.761741,0.186619,0.212362,0.011664,-0.009961,"[0.0030156493, -0.023461474, 0.50189435, 0.156...",-0.841461,0.824629,0.683366,0.445152,0.042710,0.021652
01c2e90e7b6338abdbb4c8b26cb556e07dae78ae8b3d77f6f905f1bcaae55921,2022-02-28,"[0.03373722, -0.24848263, 0.045007866, -0.2706...",-0.839533,0.906501,3.302952,0.249543,0.012902,0.000044,"[0.03373722, -0.24848263, 0.045007866, -0.2706...",-0.450704,0.291722,-0.589628,0.162404,-0.036852,-0.000935,"[-0.020161197, -0.019848729, -0.31257886, -0.0...",-0.312579,0.645651,0.718329,0.238562,0.044896,-0.012463,"[0.053617176, 0.0786512, -0.18681912, 0.695861...",-0.186819,0.906501,2.719238,0.314428,0.169952,0.066730,"[-0.03645019, 0.05147474, 0.09456701, 0.010034...",-0.049642,0.489874,0.684027,0.124719,0.042752,0.009060,"[0.03142267, -0.0039237486, 0.06458624, 0.0006...",-0.056902,0.834686,2.730680,0.275122,0.170668,0.043065,"[-0.17878541, -0.17207557, 0.034186542, 0.1244...",-0.547145,0.124469,-1.289413,0.170355,-0.080588,-0.011230,"[-0.05771265, -0.03654226, 0.030139111, 0.0578...",-0.546776,0.190653,-1.685317,0.208335,-0.105332,-0.017069,"[0.018937815, 0.05430054, 0.03698651, -0.01657...",-0.459205,0.761567,0.784791,0.276057,0.049049,0.005568,"[-0.09757768, 0.11121982, 0.0879541, -0.048529...",-0.196791,0.886488,0.798832,0.226597,0.049927,0.008901,"[-0.54513615, -0.008474815, 0.8196947, 0.01367...",-0.562561,0.819695,-0.283297,0.305752,-0.017706,-0.006663,"[0.060764544, 0.10361362, -0.0060192044, 0.045...",-0.126090,0.192693,0.004229,0.076357,0.000264,-0.010710,"[-0.83953285, 0.036267497, -0.18559283, -0.242...",-0.839533,0.128438,-1.411017,0.219689,-0.088189,-0.006977,"[0.0063496362, -0.0151401395, -0.17017402, -0....",-0.728735,0.599568,-0.862495,0.273503,-0.053906,-0.013300,"[0.0987976, 0.008978384, 0.66183525, -0.046522...",-0.373731,0.781478,0.937290,0.270227,0.058581,-0.003707,"[7.942028e-05, -0.14729646, 0.0084057, -0.0023...",-0.202878,0.571166,0.198583,0.165387,0.012411,0.004916,"[0.01831801, -0.026662963, 0.4335042, -0.02355...",-0.759608,0.503916,-0.151880,0.320165,-0.009493,-0.002618
01e393315cd55902f4d564efd65118b28458c58f16f54f137063a73f520382bb,2022-02-28,"[-0.00745101, -0.29884222, 0.011958256, -0.330...",-0.995328,0.999968,-2.036384,0.361658,-0.007955,0.006782,"[-0.00745101, -0.29884222, 0.011958256, -0.330...",-0.848705,0.686192,-0.781777,0.387595,-0.048861,0.000826,"[-0.05949812, 0.067948565, -0.57536983, 0.0124...",-0.575370,0.764938,-0.018523,0.261290,-0.001158,-0.004013,"[-0.10845571, -0.6369631, -0.07438431, 0.84519...",-0.636963,0.893518,2.802308,0.448948,0.175144,0.045546,"[-0.032011125, 0.26955482, 0.23701267, -0.1120...",-0.541791,0.670959,-0.447512,0.265757,-0.027969,-0.029064,"[-0.038519334, -0.17660686, 0.067700885, 0.013...",-0.818452,0.774713,0.595337,0.359725,0.037209,-0.018479,"[0.1808965, -0.12416727, 0.04182701, -0.036987...",-0.995328,0.272497,-1.802639,0.351831,-0.112665,0.017777,"[0.015040332, -0.10651786, 0.11305736, -0.6141...",-0.738187,0.205418,-1.344604,0.254423,-0.084038,0.012074,"[0.0137705775, 0.5047885, 0.30844963, -0.18729...",-0.869615,0.999968,0.563715,0.458955,0.035232,0.008520,"[0.44766426, -0.07459567, -0.3959787, -0.67725...",-0.677256,0.903042,0.358431,0.329420,0.022402,0.018682,"[-0.78212786, -0.1499224, 0.8805908, -0.394469...",-0.782128,0.880591,-0.543011,0.413428,-0.033938,-0.054544,"[-0.024147376, 0.014755548, 0.0037283313, -0.0...",-0.218974,0.457022,0.944778,0.152415,0.059049,0.014766,"[-0.90269905, 0.015101363, -0.042093452, -0.64...",-0.902699,0.038714,-2.701998,0.265392,-0.168875,-0.049190,"[0.095261894, 0.01708081, -0.5435379, -0.88488...",-0.906211,0.673367,-1.859112,0.372333,-0.116195,-0.013354,"[0.07054768, 0.027744822, 0.83062047, 0.002745...",-0.601972,0.910330,1.444011,0.376257,0.090251,0.043371,"[0.052008208, 0.3743539, -0.05699036, 0.015433...",-0.394699,0.736540,0.077825,0.249466,0.004864,0.001816,"[0.008399551, -0.034127474, 0.31755444, 0.2862...",-0.884461,0.900389,0.676384,0.495612,0.042274,-0.003213
0400788d62f04222bd105449f83c2c8a099583cdda753bca3d3a6b0b6340d1a4,2022-02-28,"[-0.015698446, -0.19508891, 0.08658026, -0.540...",-0.999706,0.999999,4.311945,0.401216,0.016844,-0.000388,"[-0.015698446, -0.19508891, 0.08658026, -0.540...",-0.910343,0.714369,-0.510996,0.423644,-0.031937,0.050126,"[0.044421904, 0.03886803, -0.503881, -0.006396...",-0.503881,0.946876,0.360019,0.305115,0.022501,0.033819,"[0.2816811, -0.0021547012, -0.5965723, 0.86413...",-0.596572,0.998962,2.798217,0.430506,0.174889,0.078996,"[-0.044956256, -0.45555833, -0.32434744, 0.059...",-0.918927,0.714751,-1.166980,0.413128,-0.072936,-0.054840,"[0.045549046, 0.7335343, 0.13352668, -0.018317...",-0.835938,0.865389,1.040589,0.391688,0.065037,-0.007952,"[-0.18699509, -0.066937506, 0.046717785, -0.03...",-0.999706,0.170858,-2.197459,0.334734,-0.137341,-0.017409,"[0.08227044, 0.7039277, 0.06058093, 0.02812284...",-0.935225,0.703928,-0.500433,0.394576,-0.031277,-0.003289,"[-0.0042804433, 0.2974388, 0.18737645, 0.00312...",-0.883565,0.999996,1.138793,0.456574,0.071175,0.018815,"[0.29126352, 0.01024969, 0.31712145, -0.357326...",-0.357327,0.996468,1.504141,0.311329,0.094009,0.008836,"[-0.91961586, -0.0077765924, 0.99956787, -0.13...",-0.919616,0.999568,1.717981,0.481447,0.107374,0.008091,"[-0.011440669, 0.093942896, -0.06604156, -0.30...",-0.307860,0.334715,0.493182,0.162937,0.030824,0.025882,"[-0.9539233, -0.0752309, -0.07277627, -0.26123...",-0.953923,0.542561,-0.669177,0.318834,-0.041824,-0.040040,"[0.10412365, -0.10048979, -0.3246695, -0.99729...",-0.997291,0.944337,-0.890657,0.403807,-0.055666,-0.066116,"[-0.46337733, 0.038990784, 0.79897904, 0.00317...",-0.843534,0.999999,0.145219,0.423791,0.009076,-0.015140,"[0.024322988, -0.23421429, -0.25759047, -0.107...",-0.682598,0.873547,-1.101914,0.314223,-0.068870,-0.029802,"[-0.022921573, -0.02597239, 0.6391449, -0.2892...",-0.943769,0.999739,2.151421,0.554958,0.134464,0.038800
04571400493212ac58ff9d4a2e7ed93acdd8bd4b02d06959445a7e3c88c92a8d,2022-02-28,"[-0.012629619, -0.13331479, 0.045252375, -0.06...",-0.837399,0.951435,3.452987,0.256485,0.013488,0.001016,"[-0.012629619, -0.13331479, 0.045252375, -0.06...",-0.568446,0.391153,-0.350906,0.189915,-0.021932,-0.008860,"[0.061516356, -0.04111236, -0.028563512, 0.013...",-0.259600,0.700414,0.437645,0.187385,0.027353,0.013065,"[0.056367006, 0.14611416, -0.16125858, 0.69603...",-0.224029,0.925935,2.560543,0.319499,0.160034,0.059601,"[0.0013941685, 0.17393604, -0.015348828, 0.015...",-0.273822,0.454990,0.813685,0.147393,0.050855,0.013597,"[0.026437448, 0.014086635, 0.057793688, -0.017...",-0.136273,0.845281,2.238579,0.301605,0.139911,0.028726,"[-0.019857997, 0.053536117, -0.0074773356, -0....",-0.823353,0.122256,-0.910389,0.204416,-0.056899,-0.010442,"[-0.06916669, -0.05085591, -0.06063015, 0.0323...",-0.688983,0.122389,-1.873994,0.223313,-0.117125,-0.016120,"[0.0396, -0.03198384, 0.050201934, -0.01893369...",-0.271339,0.951435,1.832811,0.319255,0.114551,0.007462,"[-0.16747014, 0.11804348, 0.023336638, -0.3761...",-0.376142,0.890795,0.424923,0.251473,0.026558,0.018337,"[-0.647787, 0.007574265, 0.8131677, -0.0192409...",-0.672942,0.813168,-1.041329,0.330514,-0.065083,-0.000354,"[-0.048865147, -0.1501554, -0.005530206, -0.28...",-0.280161,0.166754,-0.415879,0.101435,-0.025992,-0.018014,"[-0.8373985, 0.016422516, -0.036425684, -0.068...",-0.837399,0.093951,-0.965263,0.213501,-0.060329,0.014693,"[0.03633531, -0.07447385, 0.02396208, -0.69485...",-0.694859,0.561094,-0.511357,0.228168,-0.031960,-0.020853,"[-0.040191583, -0.010255968, 0.64072216, -0.04...",-0.386870,0.780235,1.085053,0.264490,0.067816,0.004773,"[-0.00047164803, -0.010215217, 0.051709827, -0...",-0.243874,0.537340,0.398804,0.146980,0.024925,0.005775,"[0.019601125, -0.00807292, 0.44589034, 0.22322...",-0.804774,0.445890,-0.269940,0.330313,-0.016871,0.004658
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
fe24a0d93b3d543e1acd1c1f39e9417c0898863e66fc103533af0bf0bfd45e96,2023-01-31,"[-0.026141044, -0.34095192, -0.0042232587, -0....",-0.930619,0.999700,-0.854123,0.338494,-0.003336,-0.009581,"[-0.026141044, -0.34095192, -0.0042232587, -0....",-0.751318,0.599595,-0.785881,0.333040,-0.049118,-0.023496,"[-0.06791069, 0.019818049, -0.026035484, 0.102...",-0.484881,0.675431,0.675432,0.232367,0.042215,-0.003143,"[0.25475413, -0.1465149, -0.30684298, 0.722778...",-0.377047,0.896824,1.933055,0.376300,0.120816,0.010364,"[-0.03094377, -0.068407536, 0.22479574, -0.028...",-0.679787,0.705205,0.591243,0.265183,0.036953,-0.003860,"[0.013256423, 0.5575335, -0.021953385, 0.03168...",-0.632002,0.678948,0.794141,0.269805,0.049634,0.027244,"[-0.34605208, 0.35815156, 0.04658412, -0.02500...",-0.930619,0.358152,-1.452051,0.304646,-0.090753,-0.009836,"[-0.024210306, 0.34990337, -0.23135547, -0.083...",-0.781395,0.349903,-1.794679,0.279431,-0.112167,-0.033242,"[0.05276252, 0.77785933, 0.06949082, 0.0213626...",-0.907375,0.999700,1.035182,0.420477,0.064699,0.028910,"[0.3344789, 0.006361162, -0.5742585, -0.577598...",-0.577598,0.906685,-0.109703,0.327157,-0.006856,0.006516,"[-0.8587306, -0.007185551, 0.93525785, -0.0758...",-0.858731,0.935258,1.143259,0.394144,0.071454,-0.006687,"[0.0023939414, -0.15960115, -0.010181029, -0.4...",-0.443948,0.025912,-1.203144,0.140746,-0.075196,-0.014711,"[-0.91947424, -0.034127485, -0.020234434, -0.3...",-0.919474,0.185311,-1.764981,0.259033,-0.110311,-0.027181,"[0.054023564, 0.0011767177, -0.102661066, -0.8...",-0.879100,0.541912,-1.601783,0.294003,-0.100111,-0.044842,"[0.43178824, 0.06254516, 0.7456638, -0.0089801...",-0.706564,0.908524,0.600563,0.382518,0.037535,-0.016291,"[0.0028196007, 0.065960646, 0.8227715, 0.07023...",-0.654566,0.822771,0.187441,0.321286,0.011715,-0.021842,"[-0.0034086935, 0.3506273, 0.6989124, -0.13715...",-0.863415,0.952902,0.897782,0.515293,0.056111,-0.015513
fe3121591756703137e1bd0da5c3496ba95d0ef9914f3b68ffccba52fc00c0f5,2023-01-31,"[-0.011553582, -0.19275495, 0.04771027, -0.133...",-0.859056,0.956089,6.748791,0.274847,0.026362,0.005905,"[-0.011553582, -0.19275495, 0.04771027, -0.133...",-0.611717,0.357384,-0.036653,0.218055,-0.002291,-0.018952,"[0.013451954, -0.0031244562, -0.13105825, -0.0...",-0.342479,0.927643,0.860453,0.273249,0.053778,0.011733,"[-0.22976455, -0.031765163, -0.12874351, 0.698...",-0.268483,0.757586,1.655247,0.293637,0.103453,0.047858,"[-0.035005275, 0.12737018, -0.19732891, 0.0144...",-0.236000,0.451523,0.659564,0.155480,0.041223,0.016035,"[0.1267742, 0.4118115, 0.13138795, 0.009904793...",-0.177741,0.795336,2.239239,0.267712,0.139952,0.034040,"[-0.18045504, -0.14551829, 0.07530061, -0.0126...",-0.770567,0.183143,-1.643533,0.252552,-0.102721,-0.014537,"[-0.016784953, -0.41120392, 0.06648369, 0.0636...",-0.673562,0.327923,-0.471037,0.216550,-0.029440,0.019973,"[0.024247479, -0.077715896, -0.019266233, 0.02...",-0.714584,0.950233,0.973067,0.353386,0.060817,0.016736,"[0.060597986, 0.0015683726, 0.34829116, -0.084...",-0.084923,0.889111,2.200524,0.254361,0.137533,0.015541,"[-0.80690056, 0.0064362125, 0.8525277, 0.00620...",-0.806901,0.852528,0.438323,0.329949,0.027395,0.006320,"[-0.03616041, 0.060922608, 0.011653408, -0.117...",-0.544266,0.132584,-0.436612,0.145643,-0.027288,-0.001042,"[-0.8590556, 0.02453019, 0.43947944, -0.053925...",-0.859056,0.439479,-0.324835,0.257441,-0.020302,0.004053,"[0.080991164, -0.054910347, -0.3801793, -0.325...",-0.380179,0.956089,-0.289556,0.289659,-0.018097,-0.025551,"[-0.31485182, 0.019330526, 0.69308484, -0.0045...",-0.379913,0.797428,0.624761,0.292336,0.039048,-0.002192,"[-0.006970822, 0.33176956, 0.10140579, -0.0587...",-0.408853,0.624521,0.344943,0.220068,0.021559,-0.008911,"[0.0043155123, -0.026175858, 0.053843252, -0.2...",-0.776237,0.687795,-0.045102,0.357682,-0.002819,-0.023775
feb2bd3d148cdf2d0a281b8f71bead7a337be941616443770125810de32c4d9e,2023-01-31,"[0.06436999, -0.035782747, 0.00076236867, -0.0...",-0.854378,0.999724,6.144876,0.250779,0.024003,0.003823,"[0.06436999, -0.035782747, 0.00076236867, -0.0...",-0.549013,0.412803,-0.170074,0.176227,-0.010630,-0.006432,"[0.015707308, -0.025389787, -0.11086635, 0.000...",-0.129290,0.687770,0.502099,0.192014,0.031381,-0.009001,"[0.16549826, 0.07320427, -0.14191534, 0.654981...",-0.141915,0.905434,2.709363,0.302267,0.169335,0.052380,"[0.027786782, 0.32171908, -0.03060863, 0.00332...",-0.053834,0.414929,0.959782,0.126801,0.059986,0.011367,"[-0.0009515966, 0.12296023, 0.04962935, -0.007...",-0.133064,0.835924,2.365230,0.275863,0.147827,0.025741,"[0.07859127, 0.13542317, 0.033360552, -0.01029...",-0.854378,0.135423,-1.043844,0.234001,-0.065240,0.004025,"[-0.052125975, -0.055178825, 0.118038796, -0.2...",-0.665001,0.209276,-1.481789,0.215582,-0.092612,-0.024955,"[0.01578425, 0.004823431, 0.16483489, -0.06642...",-0.479657,0.999724,1.555999,0.330189,0.097250,0.015593,"[0.32131422, 0.10939164, 0.052927587, -0.14518...",-0.145180,0.886796,1.286633,0.233251,0.080415,0.012931,"[-0.5326841, -0.056549665, 0.81974375, -0.0309...",-0.559249,0.819744,-0.923643,0.290058,-0.057728,-0.043336,"[0.020954886, -0.100916006, -0.013796433, 0.02...",-0.100916,0.250540,0.433142,0.088182,0.027071,0.016718,"[-0.84110135, 0.036382668, 0.02581138, -0.0160...",-0.841101,0.068426,-0.947600,0.209707,-0.059225,-0.003718,"[0.050323453, -0.07177222, 0.07126236, -0.7142...",-0.714271,0.563091,-0.306084,0.232792,-0.019130,-0.006426,"[0.13737392, -0.05428759, 0.65891427, -0.04666...",-0.361260,0.781345,1.322432,0.267477,0.082652,0.003425,"[0.031657446, 0.08300087, 0.035859343, -0.0034...",-0.257903,0.590513,0.088135,0.180159,0.005508,0.003791,"[0.016196618, 0.00072229066, 0.44681633, 0.017...",-0.780953,0.501485,-0.204906,0.326045,-0.012807,0.016658
ff6baefb213567609634414255d94e8461d1c86dde41673f2bef441fe22e0a7e,2023-01-31,"[-0.072769836, -0.4421709, -0.24522848, -0.484...",-0.964173,0.958094,-3.114336,0.384396,-0.012165,-0.000537,"[-0.072769836, -0.4421709, -0.24522848, -0.484...",-0.904047,0.958094,-1.551662,0.428928,-0.096979,-0.085776,"[-0.15580447, 0.03323787, -0.6844299, 0.235929...",-0.684430,0.734365,0.387528,0.318016,0.024221,0.010843,"[0.19090232, -0.102178164, -0.5843533, 0.91326...",-0.584353,0.951226,3.002926,0.437649,0.187683,0.062226,"[0.40324688, -0.31381252, -0.23213716, -0.0884...",-0.618826,0.707196,-0.012386,0.302388,-0.000774,-0.018040,"[0.06868061, 0.4955014, 0.107077874, 0.0179680...",-0.907473,0.908135,0.750825,0.424431,0.046927,0.056509,"[-0.6971153, -0.047665562, 0.012050878, 0.0038...",-0.964173,0.491281,-2.795194,0.376489,-0.174700,0.007436,"[-0.009772758, 0.238258, -0.07657984, 0.089271...",-0.721865,0.238258,-0.662106,0.226443,-0.041382,0.015779,"[0.024667038, 0.6206483, 0.051442407, -0.00386...",-0.867107,0.744370,0.807016,0.387394,0.050439,0.025687,"[0.27292374, -0.09956436, 0.13187295, 0.029669...",-0.572506,0.913518,1.045756,0.315339,0.065360,0.035147,"[-0.9102106, -0.03633304, 0.89060676, -0.20159...",-0.910211,0.890607,-0.491441,0.450721,-0.030715,-0.027552,"[0.023310049, 0.0837792, -0.070460275, -0.0410...",-0.297171,0.208107,-0.288871,0.121699,-0.018054,-0.023111,"[-0.9204162, 0.026397532, -0.47467637, -0.8972...",-0.920416,0.346997,-3.578868,0.360911,-0.223679,-0.054144,"[0.15483847, 0.36980852, -0.93396324, -0.53275...",-0.933963,0.717370,-1.191046,0.406364,-0.074440,-0.019479,"[-0.18799414, 0.12162062, 0.7890069, -0.001038...",-0.909335,0.928080,1.164255,0.392213,0.072766,0.024878,"[0.0012799717, -0.09621569, -0.03624855, -0.02...",-0.579253,0.635895,-0.806682,0.260551,-0.050418,-0.037082,"[-0.00013920247, -0.123322666, 0.68921417, -0....",-0.927907,0.929589,1.105615,0.533644,0.069101,-0.014230


In [179]:
%%time
# Сохраняем фичи по эмбеддингам ptls
output_filename = PATH_DATASET_OUTPUT + f'ptls_{ptls_type}_agg_{datetime.now().strftime("%d_%m_%Y__%H_%M")}.parquet'
print(output_filename)
union_ptls_trx_agg_df.to_parquet(output_filename)
union_ptls_trx_agg_df.shape

datasets/ptls_geo_agg_15_06_2024__16_36.parquet
CPU times: total: 10.1 s
Wall time: 10.2 s


(377469, 119)