In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 350)
import gc, os
import re
import vaex, vaex.ml

In [3]:
# clickstream.zip -   выборка с поведенческими данными из цифровых каналов

# timestamp - дата и время совершения события
# application_id - идентификатор приложения
# client	- Идентификатор клиента 
# session_id - Идентификатор сессии
# event_type - Тип события
# event_category - Категория события
# event_name - Имя события
# event_label - Дополнительный атрибут события
# device_screen_name - Имя экрана на котором произошло событие
# timezone - Часовой пояс
# device_is_webview - Флаг того что страница открыта внутри webview
# page_urlhost - Домен страницы
# page_urlpath_full - Путь страницы
# net_connection_type - Тип подключения
# net_connection_tech - Технология подключения

# prediction_session_timestamp.csv -  выборка для построения прогноза 
# abattle_train_target.csv -  выборка для обучения 

# client - Идентификатор клиента 
# session_id - Идентификатор сессии
# timestamp - Время начала сессии
# target - Целевое действие внутри сессии, multi-class переменная

# abattle_sample_prediction.csv -  сэмпл сабмит.

In [4]:
help(vaex)

Help on package vaex:

NAME
    vaex - Vaex is a library for dealing with larger than memory DataFrames (out of core).

DESCRIPTION
    The most important class (datastructure) in vaex is the :class:`.DataFrame`. A DataFrame is obtained by either opening
    the example dataset:
    
    >>> import vaex
    >>> df = vaex.example()
    
    Or using :func:`open` to open a file.
    
    >>> df1 = vaex.open("somedata.hdf5")
    >>> df2 = vaex.open("somedata.fits")
    >>> df2 = vaex.open("somedata.arrow")
    >>> df4 = vaex.open("somedata.csv")
    
    Or connecting to a remove server:
    
    >>> df_remote = vaex.open("http://try.vaex.io/nyc_taxi_2015")
    
    
    A few strong features of vaex are:
    
     * Performance: works with huge tabular data, process over a billion (> 10\ :sup:`9`\ ) rows/second.
     * Expression system / Virtual columns: compute on the fly, without wasting ram.
     * Memory efficient: no memory copies when doing filtering/selections/subsets.
     * Vis

In [4]:
help(vaex.open)

Help on function open in module vaex:

open(path, convert=False, shuffle=False, copy_index=False, *args, **kwargs)
    Open a DataFrame from file given by path.
    
    Example:
    
    >>> df = vaex.open('sometable.hdf5')
    >>> df = vaex.open('somedata*.csv', convert='bigdata.hdf5')
    
    :param str or list path: local or absolute path to file, or glob string, or list of paths
    :param convert: convert files to an hdf5 file for optimization, can also be a path
    :param bool shuffle: shuffle converted DataFrame or not
    :param args: extra arguments for file readers that need it
    :param kwargs: extra keyword arguments
    :param bool copy_index: copy index when source is read via pandas
    :return: return a DataFrame on success, otherwise None
    :rtype: DataFrame
    
    S3 support:
    
    Vaex supports streaming in hdf5 files from Amazon AWS object storage S3.
    Files are by default cached in $HOME/.vaex/file-cache/s3 such that successive access
    is as fast a

In [3]:
help(vaex.functions)

Help on module vaex.functions in vaex:

NAME
    vaex.functions

FUNCTIONS
    add_geo_json(ds, json_or_file, column_name, longitude_expression, latitude_expresion, label=None, persist=True, overwrite=False, inplace=False, mapping=None)
    
    dt_day(x)
        Extracts the day from a datetime sample.
        
        :returns: an expression containing the day extracted from a datetime column.
        
        Example:
        
        >>> import vaex
        >>> import numpy as np
        >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
        >>> df = vaex.from_arrays(date=date)
        >>> df
          #  date
          0  2009-10-12 03:31:00
          1  2016-02-11 10:17:34
          2  2015-11-12 11:34:22
        
        >>> df.date.dt.day
        Expression = dt_day(date)
        Length: 3 dtype: int64 (expression)
        -----------------------------------
        0  12
        1  11
        2  12
    
    dt_da

In [10]:
df = vaex.open('./alfabattle2_abattle_clickstream/part-*.parquet', 
               convert='./alfabattle2_abattle_clickstream/bigdata.hdf5')

In [11]:
df

#,timestamp,application_id,client,session_id,event_type,event_category,event_name,event_label,device_screen_name,timezone,device_is_webview,page_urlhost,page_urlpath_full,net_connection_type,net_connection_tech
0,2020-01-15 08:46:32.579000000,anketa,107efb4507525ce982e19331b36b0e2f,9a1366886da59ec85f0ef5e06f1e97f6,pv,,,,,Asia/Omsk,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,
1,2020-04-29 19:20:09.705000000,anketa,32dac9ab28a497a8061e02786bb3a95a,bdf7fc21e4e88d1b4f0838c80b21a1e3,pv,,,,,Asia/Yekaterinburg,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,
2,2020-02-21 03:21:24.058000000,anketa,489818f57087b59790435b3acf18018a,8bae6c4249db74dc0937e1ff00d61079,pv,,,,,Asia/Yakutsk,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,
3,2020-08-06 06:54:30.918000000,anketa,489818f57087b59790435b3acf18018a,3a95ba68795b745ca09ebee138b418ef,pv,,,,,Asia/Yakutsk,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,
4,2020-02-28 18:51:56.452000000,anketa,6dde48ffeb6d354628303c9d9b6721be,1db024ef087161f30a3c981cb32bb565,pv,,,,,Asia/Omsk,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120025281,2020-01-30 06:08:58.984000000,site,1f89fcbd636baf8b4a08f003626c1c07,fe232104d6c408463b4d1588137af8bb,se,flocktory-precheckout,system-show,e1da19a13dedeb0c5c02ef0750201fe0,,Europe/Minsk,True,travel.alfabank.ru,d612076c3948baf3fc5cb65ccd006db5,,
120025282,2020-03-10 16:15:16.416000000,site,bd5979fffbf9d2e3d66203474df97898,2045bacc141fb1786f25a192dbab8264,se,Pixel,Match,a19e4a8f8af8d3326120cd5a2b946ee9,,Europe/Minsk,True,travel.alfabank.ru,e4286cb4e67fcfd64f41b982bc3dbb8a,,
120025283,2020-01-04 13:49:12.932000000,site,8ebd42b552e51e1e2dfb2ebea56d16f7,29ec828dbf2d8fb6c2fd0b737169ef9b,se,Pixel,Match,a19e4a8f8af8d3326120cd5a2b946ee9,,Europe/Minsk,True,travel.alfabank.ru,e98a4d4a15356368e51de30169a611a8,,
120025284,2020-06-03 11:26:29.646000000,site,f73c439a6b0cfe941fcbefa8a1d94610,d33008b1774c8980641c369ac69d4639,se,JivoSite,Offline message sent,1a70145c185f60da9db8d0e9755183b4,,Asia/Karachi,True,travel.alfabank.ru,ea4353a6cbbd67ac59323abfa7a3071b,,


In [12]:
df['timestamp'] = df['timestamp'].astype('datetime64[ns]')

In [28]:
df['zone_continent'] = df.timezone.apply(lambda x: x.split('/')[0])
df['zone_town'] = df.timezone.apply(lambda x: str(x + '/').split('/')[1])

In [24]:
df['application_id'].value_counts()

mobile                                      115670971
retail_upsale_ui                              1021548
anketa                                         695988
site                                           589999
retail_cards_activation                        572583
retail_upsale_cards                            539782
retail_cards_pin_change                        236260
retail_cards_insurance_antifraud_sale          165197
retail_realty_insurance                        135070
retail_debit_card_order                        123698
retail-taxes-ui                                119653
retail-upsale-ui                                47440
retail_upsale_verification                      29904
retail-marketplace                              29798
retail_upsale_credit_holidays_ui                21136
retail_insurance_sale                           17993
retail_credit_cards_insurance_sale               3296
retail_digital_wealth_management_main_ui         3283
retail-upsale-consent-ui    

In [25]:
encoder = vaex.ml.LabelEncoder(features=['application_id'])
df = encoder.fit_transform(df);

In [26]:
encoder = vaex.ml.LabelEncoder(features=['event_type'])
df = encoder.fit_transform(df);

In [27]:
encoder = vaex.ml.LabelEncoder(features=['event_category'])
df = encoder.fit_transform(df);

In [28]:
encoder = vaex.ml.LabelEncoder(features=['event_name'])
df = encoder.fit_transform(df);

In [29]:
encoder = vaex.ml.LabelEncoder(features=['device_screen_name'])
df = encoder.fit_transform(df);

In [30]:
encoder = vaex.ml.LabelEncoder(features=['timezone'])
df = encoder.fit_transform(df);

In [31]:
# encoder = vaex.ml.LabelEncoder(features=['device_is_webview'])
# df = encoder.fit_transform(df);

In [32]:
encoder = vaex.ml.LabelEncoder(features=['page_urlhost'])
df = encoder.fit_transform(df);

In [33]:
encoder = vaex.ml.LabelEncoder(features=['page_urlpath_full'])
df = encoder.fit_transform(df);

In [34]:
encoder = vaex.ml.LabelEncoder(features=['net_connection_type'])
df = encoder.fit_transform(df);

In [35]:
encoder = vaex.ml.LabelEncoder(features=['net_connection_tech'])
df = encoder.fit_transform(df);

In [30]:
encoder = vaex.ml.LabelEncoder(features=['zone_continent'])
df = encoder.fit_transform(df);

In [59]:
encoder = vaex.ml.LabelEncoder(features=['zone_town'])
df = encoder.fit_transform(df);

In [19]:
df

#,timestamp,application_id,client,session_id,event_type,event_category,event_name,event_label,device_screen_name,timezone,device_is_webview,page_urlhost,page_urlpath_full,net_connection_type,net_connection_tech,label_encoded_application_id,label_encoded_event_type,label_encoded_event_category,label_encoded_event_name,label_encoded_device_screen_name,label_encoded_timezone,label_encoded_page_urlhost,label_encoded_page_urlpath_full,label_encoded_net_connection_type,label_encoded_net_connection_tech
0,2020-01-15 08:46:32.579000000,anketa,107efb4507525ce982e19331b36b0e2f,9a1366886da59ec85f0ef5e06f1e97f6,pv,,,,,Asia/Omsk,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,,0,2,,,,8,4,13570,,
1,2020-04-29 19:20:09.705000000,anketa,32dac9ab28a497a8061e02786bb3a95a,bdf7fc21e4e88d1b4f0838c80b21a1e3,pv,,,,,Asia/Yekaterinburg,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,,0,2,,,,2,4,13570,,
2,2020-02-21 03:21:24.058000000,anketa,489818f57087b59790435b3acf18018a,8bae6c4249db74dc0937e1ff00d61079,pv,,,,,Asia/Yakutsk,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,,0,2,,,,40,4,13570,,
3,2020-08-06 06:54:30.918000000,anketa,489818f57087b59790435b3acf18018a,3a95ba68795b745ca09ebee138b418ef,pv,,,,,Asia/Yakutsk,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,,0,2,,,,40,4,13570,,
4,2020-02-28 18:51:56.452000000,anketa,6dde48ffeb6d354628303c9d9b6721be,1db024ef087161f30a3c981cb32bb565,pv,,,,,Asia/Omsk,True,anketa.alfabank.ru,03eb3ceab6204f8b602f1863f7fbce01,,,0,2,,,,8,4,13570,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120025281,2020-01-30 06:08:58.984000000,site,1f89fcbd636baf8b4a08f003626c1c07,fe232104d6c408463b4d1588137af8bb,se,flocktory-precheckout,system-show,e1da19a13dedeb0c5c02ef0750201fe0,,Europe/Minsk,True,travel.alfabank.ru,d612076c3948baf3fc5cb65ccd006db5,,,22,0,162,7421,,34,6,78599,,
120025282,2020-03-10 16:15:16.416000000,site,bd5979fffbf9d2e3d66203474df97898,2045bacc141fb1786f25a192dbab8264,se,Pixel,Match,a19e4a8f8af8d3326120cd5a2b946ee9,,Europe/Minsk,True,travel.alfabank.ru,e4286cb4e67fcfd64f41b982bc3dbb8a,,,22,0,158,7404,,34,6,96385,,
120025283,2020-01-04 13:49:12.932000000,site,8ebd42b552e51e1e2dfb2ebea56d16f7,29ec828dbf2d8fb6c2fd0b737169ef9b,se,Pixel,Match,a19e4a8f8af8d3326120cd5a2b946ee9,,Europe/Minsk,True,travel.alfabank.ru,e98a4d4a15356368e51de30169a611a8,,,22,0,158,7404,,34,6,13541,,
120025284,2020-06-03 11:26:29.646000000,site,f73c439a6b0cfe941fcbefa8a1d94610,d33008b1774c8980641c369ac69d4639,se,JivoSite,Offline message sent,1a70145c185f60da9db8d0e9755183b4,,Asia/Karachi,True,travel.alfabank.ru,ea4353a6cbbd67ac59323abfa7a3071b,,,22,0,197,8744,,73,6,70103,,


In [39]:
df['timestamp', 'client', 'session_id', 'label_encoded_application_id', 
                                       'label_encoded_event_type', 
                                       'label_encoded_event_category', 
                                       'label_encoded_event_name', 
                                       'label_encoded_device_screen_name', 
                                       'label_encoded_timezone', 
                                       'label_encoded_page_urlhost', 
                                       'label_encoded_page_urlpath_full', 
                                       'label_encoded_net_connection_type', 
                                       'label_encoded_net_connection_tech'
  ].export(f'./alfabattle2_abattle_clickstream/bigdata_2.hdf5', progress=True)

[########################################] 99.99% estimated time:     0.03s =  0.0m =  0.0h                                                                                                                                                                    

In [4]:
df = vaex.open('./alfabattle2_abattle_clickstream/bigdata_2.hdf5')

In [5]:
df

#,timestamp,client,session_id,label_encoded_application_id,label_encoded_event_type,label_encoded_event_category,label_encoded_event_name,label_encoded_device_screen_name,label_encoded_timezone,label_encoded_page_urlhost,label_encoded_page_urlpath_full,label_encoded_net_connection_type,label_encoded_net_connection_tech
0,2020-01-15 08:46:32.579000000,107efb4507525ce982e19331b36b0e2f,9a1366886da59ec85f0ef5e06f1e97f6,8,2,-1,-1,-1,4,10,26901,-1,-1
1,2020-04-29 19:20:09.705000000,32dac9ab28a497a8061e02786bb3a95a,bdf7fc21e4e88d1b4f0838c80b21a1e3,8,2,-1,-1,-1,5,10,26901,-1,-1
2,2020-02-21 03:21:24.058000000,489818f57087b59790435b3acf18018a,8bae6c4249db74dc0937e1ff00d61079,8,2,-1,-1,-1,8,10,26901,-1,-1
3,2020-08-06 06:54:30.918000000,489818f57087b59790435b3acf18018a,3a95ba68795b745ca09ebee138b418ef,8,2,-1,-1,-1,8,10,26901,-1,-1
4,2020-02-28 18:51:56.452000000,6dde48ffeb6d354628303c9d9b6721be,1db024ef087161f30a3c981cb32bb565,8,2,-1,-1,-1,4,10,26901,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
120025281,2020-01-30 06:08:58.984000000,1f89fcbd636baf8b4a08f003626c1c07,fe232104d6c408463b4d1588137af8bb,7,1,250,3261,-1,15,9,13853,-1,-1
120025282,2020-03-10 16:15:16.416000000,bd5979fffbf9d2e3d66203474df97898,2045bacc141fb1786f25a192dbab8264,7,1,247,3252,-1,15,9,13880,-1,-1
120025283,2020-01-04 13:49:12.932000000,8ebd42b552e51e1e2dfb2ebea56d16f7,29ec828dbf2d8fb6c2fd0b737169ef9b,7,1,247,3252,-1,15,9,13890,-1,-1
120025284,2020-06-03 11:26:29.646000000,f73c439a6b0cfe941fcbefa8a1d94610,d33008b1774c8980641c369ac69d4639,7,1,287,3528,-1,54,9,13891,-1,-1


In [5]:
df['timestamp', 'client', 'session_id', 'device_is_webview', 
   'label_encoded_application_id', 'label_encoded_event_type', 
   'label_encoded_event_category', 'label_encoded_event_name', 
   'label_encoded_device_screen_name', 'label_encoded_timezone', 
   'label_encoded_page_urlhost', 'label_encoded_page_urlpath_full', 
   'label_encoded_net_connection_type', 'label_encoded_net_connection_tech', 
   'label_encoded_zone_continent', 
   'label_encoded_zone_town'].export_csv(f'./df_pandas_2.csv', progress=True)

[########################################] 100.00% elapsed time  :  2271.84s =  37.9m =  0.6h
 

In [None]:
df_pandas = df.to_pandas_df([#'timestamp', 
                             #'client',
                             #'session_id',
                             #'device_is_webview',
                             'label_encoded_application_id',
                             'label_encoded_event_type', 
                             'label_encoded_event_category', 
                             'label_encoded_event_name', 
                             'label_encoded_device_screen_name', 
                             'label_encoded_timezone', 
                             'label_encoded_page_urlhost', 
                             'label_encoded_page_urlpath_full', 
                             'label_encoded_net_connection_type', 
                             'label_encoded_net_connection_tech', 
                             'label_encoded_zone_continent', 
                             'label_encoded_zone_town'])

In [14]:
df[(df['client']=='90725b54ce77576883813d87749df6bd') & (df['timestamp']==np.datetime64('2020-08-01 00:02:57'))]

#,timestamp,application_id,client,session_id,event_type,event_category,event_name,event_label,device_screen_name,timezone,device_is_webview,page_urlhost,page_urlpath_full,net_connection_type,net_connection_tech,year,month,day,hour,minute,second,quarter,dayofyear,weekofyear,dayofweek,zone_continent,zone_town
0,2020-08-01 00:02:57,mobile,9589ba6ff9363c3eb5a148373e496aa7,8b94df22989193b94e6269e0b0a26059,sv,,,,UserProfileActivity,Europe/Moscow,True,,,wifi,,2020,8,1,0,2,57,3,214,31,5,Europe,Moscow
1,2020-08-01 00:02:57,mobile,9dbb596fa589dd8d3958cf99de57e93a,8a970778232ead5c0145965401c99ab6,se,Application Lifecycle,Background,,,Europe/Moscow,True,,,wifi,,2020,8,1,0,2,57,3,214,31,5,Europe,Moscow


In [19]:
df[(df['client']=='4e46e206d38dcb7af41d9ced20d50ac2')].sort('timestamp', ascending=False)

#,timestamp,application_id,client,session_id,event_type,event_category,event_name,event_label,device_screen_name,timezone,device_is_webview,page_urlhost,page_urlpath_full,net_connection_type,net_connection_tech,year,month,day,hour,minute,second,quarter,dayofyear,weekofyear,dayofweek,zone_continent,zone_town
0,2020-09-29 12:35:13,mobile,4e46e206d38dcb7af41d9ced20d50ac2,a3b3101bd7cd1b16d8b61d74d551db4a,se,Widget Dashboard,Click > Account Item,,Accounts And Cards Widget,Europe/Moscow,True,,,wifi,,2020,9,29,12,35,13,3,273,40,1,Europe,Moscow
1,2020-09-29 12:35:09,mobile,4e46e206d38dcb7af41d9ced20d50ac2,a3b3101bd7cd1b16d8b61d74d551db4a,sv,,,,MainListOnWidgetsFragment,Europe/Moscow,True,,,wifi,,2020,9,29,12,35,9,3,273,40,1,Europe,Moscow
2,2020-09-29 12:35:01,mobile,4e46e206d38dcb7af41d9ced20d50ac2,a3b3101bd7cd1b16d8b61d74d551db4a,sv,,,,SignInActivity,Europe/Moscow,True,,,wifi,,2020,9,29,12,35,1,3,273,40,1,Europe,Moscow
3,2020-09-29 12:35:00,mobile,4e46e206d38dcb7af41d9ced20d50ac2,a3b3101bd7cd1b16d8b61d74d551db4a,se,Application Lifecycle,Foreground,,,Europe/Moscow,True,,,wifi,,2020,9,29,12,35,0,3,273,40,1,Europe,Moscow
4,2020-09-28 15:47:19,mobile,4e46e206d38dcb7af41d9ced20d50ac2,59e26d7fba7292bdb4ae77bb2bd643ef,se,Application Lifecycle,Background,,,Europe/Moscow,True,,,wifi,,2020,9,28,15,47,19,3,272,40,0,Europe,Moscow
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11009,2020-01-02 22:43:37,mobile,4e46e206d38dcb7af41d9ced20d50ac2,63191c93e00ace2dfb882dca5fdbf0d1,sv,,,,MainListFragment,Europe/Kiev,True,,,wifi,,2020,1,2,22,43,37,1,2,1,3,Europe,Kiev
11010,2020-01-02 22:43:32,mobile,4e46e206d38dcb7af41d9ced20d50ac2,63191c93e00ace2dfb882dca5fdbf0d1,sv,,,,SignInActivity,Europe/Kiev,True,,,wifi,,2020,1,2,22,43,32,1,2,1,3,Europe,Kiev
11011,2020-01-02 19:27:59,mobile,4e46e206d38dcb7af41d9ced20d50ac2,cca87ff1fb46cc922cd1bbee6e47887c,sv,,,,SignInActivity,Europe/Kiev,True,,,wifi,,2020,1,2,19,27,59,1,2,1,3,Europe,Kiev
11012,2020-01-01 13:02:04,mobile,4e46e206d38dcb7af41d9ced20d50ac2,9b243e53b598e11dbdc07cbb1c67c190,sv,,,,MainListFragment,Europe/Kiev,True,,,wifi,,2020,1,1,13,2,4,1,1,1,2,Europe,Kiev


In [36]:
df[(df['client']=='73b4ee58ec16778d9e966bdc0595fcb7') & \
   (df['timestamp']<=np.datetime64('2020-06-20 17:36:50'))] \
.sort('timestamp', ascending=False).head(10)

#,timestamp,application_id,client,session_id,event_type,event_category,event_name,event_label,device_screen_name,timezone,device_is_webview,page_urlhost,page_urlpath_full,net_connection_type,net_connection_tech,year,month,day,hour,minute,second,quarter,dayofyear,weekofyear,dayofweek,zone_continent,zone_town
0,2020-06-20 17:35:58,mobile,73b4ee58ec16778d9e966bdc0595fcb7,fffed0d3e5a8aa83b17ea4f1aed68b6f,se,Application Lifecycle,Background,,,Europe/Moscow,True,,,offline,,2020,6,20,17,35,58,2,172,25,5,Europe,Moscow
1,2020-06-20 17:35:55,mobile,73b4ee58ec16778d9e966bdc0595fcb7,fffed0d3e5a8aa83b17ea4f1aed68b6f,se,Widget Dashboard,Error,4930579a044a6143726f5a2dfca83203,Widget Dashboard,Europe/Moscow,True,,,mobile,HSPA+,2020,6,20,17,35,55,2,172,25,5,Europe,Moscow
2,2020-06-20 17:35:55,mobile,73b4ee58ec16778d9e966bdc0595fcb7,fffed0d3e5a8aa83b17ea4f1aed68b6f,sv,,,,MainListOnWidgetsFragment,Europe/Moscow,True,,,mobile,HSPA+,2020,6,20,17,35,55,2,172,25,5,Europe,Moscow
3,2020-06-20 17:35:51,mobile,73b4ee58ec16778d9e966bdc0595fcb7,fffed0d3e5a8aa83b17ea4f1aed68b6f,sv,,,,SignInActivity,Europe/Moscow,True,,,mobile,HSPA+,2020,6,20,17,35,51,2,172,25,5,Europe,Moscow
4,2020-06-20 17:35:50,mobile,73b4ee58ec16778d9e966bdc0595fcb7,fffed0d3e5a8aa83b17ea4f1aed68b6f,se,Application Lifecycle,Foreground,,,Europe/Moscow,True,,,mobile,HSPA+,2020,6,20,17,35,50,2,172,25,5,Europe,Moscow
5,2020-06-20 17:07:49,mobile,73b4ee58ec16778d9e966bdc0595fcb7,a17bc0601b19342e10eb828eb6ded6e0,se,Push,receive,,,Europe/Moscow,True,,,mobile,HSPA+,2020,6,20,17,7,49,2,172,25,5,Europe,Moscow
6,2020-06-20 17:07:36,mobile,73b4ee58ec16778d9e966bdc0595fcb7,a17bc0601b19342e10eb828eb6ded6e0,se,Push,receive,,,Europe/Moscow,True,,,mobile,HSPA+,2020,6,20,17,7,36,2,172,25,5,Europe,Moscow
7,2020-06-20 17:02:18,mobile,73b4ee58ec16778d9e966bdc0595fcb7,a17bc0601b19342e10eb828eb6ded6e0,se,Application Lifecycle,Background,,,Europe/Moscow,True,,,offline,,2020,6,20,17,2,18,2,172,25,5,Europe,Moscow
8,2020-06-20 17:02:17,mobile,73b4ee58ec16778d9e966bdc0595fcb7,a17bc0601b19342e10eb828eb6ded6e0,sv,,,,MainListOnWidgetsFragment,Europe/Moscow,True,,,mobile,HSPA+,2020,6,20,17,2,17,2,172,25,5,Europe,Moscow
9,2020-06-20 17:02:16,mobile,73b4ee58ec16778d9e966bdc0595fcb7,a17bc0601b19342e10eb828eb6ded6e0,se,Application Lifecycle,Foreground,,,Europe/Moscow,True,,,mobile,HSPA+,2020,6,20,17,2,16,2,172,25,5,Europe,Moscow


In [5]:
df[(df['client']=='ffffe1150b0d2ca8005786cd35729226') & \
   (df['timestamp']==np.datetime64('2020-09-14 04:12:49'))]

#,timestamp,application_id,client,session_id,event_type,event_category,event_name,event_label,device_screen_name,timezone,device_is_webview,page_urlhost,page_urlpath_full,net_connection_type,net_connection_tech,year,month,day,hour,minute,second,quarter,dayofyear,weekofyear,dayofweek,zone_continent,zone_town
0,2020-09-14 04:12:49,mobile,ffffe1150b0d2ca8005786cd35729226,a7b95fa31477d4b27285a477f77ee593,se,Application Lifecycle,Foreground,,,Europe/Moscow,True,,,mobile,LTE,2020,9,14,4,12,49,3,258,38,0,Europe,Moscow
1,2020-09-14 04:12:49,mobile,ffffe1150b0d2ca8005786cd35729226,a7b95fa31477d4b27285a477f77ee593,sv,,,,MainListOnWidgetsFragment,Europe/Moscow,True,,,mobile,LTE,2020,9,14,4,12,49,3,258,38,0,Europe,Moscow
2,2020-09-14 04:12:49,mobile,ffffe1150b0d2ca8005786cd35729226,a7b95fa31477d4b27285a477f77ee593,sv,,,,StatementFragment,Europe/Moscow,True,,,mobile,LTE,2020,9,14,4,12,49,3,258,38,0,Europe,Moscow
