# Dask tutorial

In [2]:
# PyArrow is a dependency of Dask
# !sudo apt-get update
# !sudo apt-get install python3-dask
# !pip3 install pyarrow

import dask
import dask.dataframe as dd

# When dask uses pandas 2.0.0+ it casts object columns to string automatically ([("i", 0.48)] -> '[("i", 0.48)]')
_ = dask.config.set({"dataframe.convert-string": False})

Dask Dataframes are just collections of Pandas-like Dataframes.

In [3]:
dataset_path = "smadex-challenge-predict-the-revenue/train/train"
filters = [("datetime", ">=", "2025-10-01-00-00"), ("datetime", "<", "2025-10-13-00-00")]

ddf = dd.read_parquet(
    dataset_path,
    filters = filters
)

We can operate with Dask Dataframes with the same API as the Pandas DataFrames...

In [13]:
ddf.head()

Unnamed: 0,buyer_d1,buyer_d7,buyer_d14,buyer_d28,buy_d7,buy_d14,buy_d28,iap_revenue_d7,iap_revenue_d14,iap_revenue_d28,...,user_bundles_l28d,weekend_ratio,weeks_since_first_seen,wifi_ratio,whale_users_bundle_num_buys_prank,whale_users_bundle_revenue_prank,whale_users_bundle_total_num_buys,whale_users_bundle_total_revenue,row_id,datetime
0,0,1,1,1,1,1,1,2.147718,2.147718,2.147718,...,"[88981729bd5c1e5aea9ada4bce00a2531e9e98f7, 25c...",0.019802,6.0,0.913366,,,,,819ecc0e-1a97-43ed-83f6-b9ede4f7fc48,2025-10-01-00-00
1,0,0,0,0,0,0,0,0.0,0.0,0.0,...,,,,,,,,,0a7fbf18-5041-42af-bd0a-0cb6586b8598,2025-10-01-00-00
2,0,0,0,0,0,0,0,0.0,0.0,0.0,...,"[6506b7e0a24666debd08f74266800f2eb154df5a, 150...",0.399021,6.0,0.999388,,,,,fc1a2689-b136-4ffa-b23b-9d8215bd720f,2025-10-01-00-00
3,0,0,0,0,0,0,0,0.0,0.0,0.0,...,"[2b472e3dc96f1847490d7411b25e12ed417b9714, 3ba...",0.121547,6.0,1.0,,,,,0340fcc6-50bd-42ab-b9f4-4c1184b640cb,2025-10-01-00-00
4,0,0,0,0,0,0,0,0.0,0.0,0.0,...,"[1031535cf2a1315422fd05d321349bcd3c3ffc04, 478...",0.293285,6.0,0.160243,,,,,219d253f-bef4-4039-84b2-ed55f009cc43,2025-10-01-00-00


...but we cannot actually use them like this. Note that getting the header of the dataframe had a lot of computational cost because we had to load the data into memory.

In [14]:
ddf

Unnamed: 0_level_0,buyer_d1,buyer_d7,buyer_d14,buyer_d28,buy_d7,buy_d14,buy_d28,iap_revenue_d7,iap_revenue_d14,iap_revenue_d28,registration,retention_d1_to_d7,retention_d3_to_d7,retention_d7_to_d14,retention_d1,retention_d3,retentiond7,advertiser_bundle,advertiser_category,advertiser_subcategory,advertiser_bottom_taxonomy_level,carrier,country,region,dev_make,dev_model,dev_os,dev_osv,hour,release_date,release_msrp,weekday,avg_act_days,avg_daily_sessions,avg_days_ins,avg_duration,bcat,bcat_bottom_taxonomy,bundles_cat,bundles_cat_bottom_taxonomy,bundles_ins,city_hist,country_hist,cpm,cpm_pct_rk,ctr,ctr_pct_rk,dev_language_hist,dev_osv_hist,first_request_ts,first_request_ts_bundle,first_request_ts_category_bottom_taxonomy,hour_ratio,iap_revenue_usd_bundle,iap_revenue_usd_category,iap_revenue_usd_category_bottom_taxonomy,last_buy,last_buy_ts_bundle,last_buy_ts_category,last_ins,last_install_ts_bundle,last_install_ts_category,advertiser_actions_action_count,advertiser_actions_action_last_timestamp,user_actions_bundles_action_count,user_actions_bundles_action_last_timestamp,last_advertiser_action,new_bundles,num_buys_bundle,num_buys_category,num_buys_category_bottom_taxonomy,region_hist,rev_by_adv,rwd_prank,user_bundles,user_bundles_l28d,weekend_ratio,weeks_since_first_seen,wifi_ratio,whale_users_bundle_num_buys_prank,whale_users_bundle_revenue_prank,whale_users_bundle_total_num_buys,whale_users_bundle_total_revenue,row_id,datetime
npartitions=144,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1
,int32,int32,int32,int32,int64,int64,int64,float64,float64,float64,int32,int32,int32,int32,int32,int32,int32,object,object,object,object,object,object,object,object,object,object,object,object,object,int64,int32,float64,object,float64,object,object,object,object,object,object,object,object,object,object,object,object,object,object,int64,object,object,object,object,object,object,int64,object,object,int64,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,object,float64,int32,float64,object,object,object,object,object,category[known]
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


We need to manually get hold of the partitions individually. We can do this easily by converting the DataFrame into a list of Dask Delayed objects.

In [7]:
parts = [part for part in ddf.to_delayed()]

Then load each part separately. Just loading one partition is not _too_ slow.

In [21]:
part_0 = parts[0].compute()
part_0.head()

Unnamed: 0,buyer_d1,buyer_d7,buyer_d14,buyer_d28,buy_d7,buy_d14,buy_d28,iap_revenue_d7,iap_revenue_d14,iap_revenue_d28,...,user_bundles_l28d,weekend_ratio,weeks_since_first_seen,wifi_ratio,whale_users_bundle_num_buys_prank,whale_users_bundle_revenue_prank,whale_users_bundle_total_num_buys,whale_users_bundle_total_revenue,row_id,datetime
0,0,1,1,1,1,1,1,2.147718,2.147718,2.147718,...,"[88981729bd5c1e5aea9ada4bce00a2531e9e98f7, 25c...",0.019802,6.0,0.913366,,,,,819ecc0e-1a97-43ed-83f6-b9ede4f7fc48,2025-10-01-00-00
1,0,0,0,0,0,0,0,0.0,0.0,0.0,...,,,,,,,,,0a7fbf18-5041-42af-bd0a-0cb6586b8598,2025-10-01-00-00
2,0,0,0,0,0,0,0,0.0,0.0,0.0,...,"[6506b7e0a24666debd08f74266800f2eb154df5a, 150...",0.399021,6.0,0.999388,,,,,fc1a2689-b136-4ffa-b23b-9d8215bd720f,2025-10-01-00-00
3,0,0,0,0,0,0,0,0.0,0.0,0.0,...,"[2b472e3dc96f1847490d7411b25e12ed417b9714, 3ba...",0.121547,6.0,1.0,,,,,0340fcc6-50bd-42ab-b9f4-4c1184b640cb,2025-10-01-00-00
4,0,0,0,0,0,0,0,0.0,0.0,0.0,...,"[1031535cf2a1315422fd05d321349bcd3c3ffc04, 478...",0.293285,6.0,0.160243,,,,,219d253f-bef4-4039-84b2-ed55f009cc43,2025-10-01-00-00


The loaded partition is a Pandas DataFrame.

In [20]:
type(part_0)

pandas.core.frame.DataFrame

In [19]:
part_0.describe()

Unnamed: 0,buyer_d1,buyer_d7,buyer_d14,buyer_d28,buy_d7,buy_d14,buy_d28,iap_revenue_d7,iap_revenue_d14,iap_revenue_d28,...,release_msrp,weekday,avg_act_days,avg_days_ins,first_request_ts,last_buy,last_ins,weekend_ratio,weeks_since_first_seen,wifi_ratio
count,121887.0,121887.0,121887.0,121887.0,121887.0,121887.0,121887.0,121887.0,121887.0,121887.0,...,109905.0,121887.0,61107.0,7183.0,53907.0,2374.0,20522.0,63305.0,67489.0,63304.0
mean,0.031217,0.042301,0.044615,0.046601,0.095096,0.124853,0.174161,8.001834,8.308581,9.071367,...,564.108494,3.0,3.998519,5.228739,1758773000.0,1758279000.0,1758424000.0,0.329158,4.877195,0.641166
std,0.173906,0.201277,0.206458,0.210783,0.996749,1.397024,2.388029,2048.960358,2049.186514,2050.423635,...,514.951762,0.0,2.115821,5.584633,375764.1,712223.3,740638.6,0.270035,1.947677,0.39861
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,30.0,3.0,1.0,0.0,1758067000.0,1756600000.0,1756771000.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,180.0,3.0,2.0,1.0,1758499000.0,1757735000.0,1757857000.0,0.12987,4.0,0.2212
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,299.0,3.0,4.0,3.333333,1758499000.0,1758478000.0,1758611000.0,0.27981,6.0,0.842471
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,899.0,3.0,6.0,7.0,1759190000.0,1758888000.0,1759091000.0,0.458333,6.0,1.0
max,1.0,1.0,1.0,1.0,136.0,150.0,368.0,696622.14,696622.14,696622.14,...,5160.0,3.0,7.0,28.0,1759190000.0,1759104000.0,1759277000.0,1.0,6.0,1.0


In [24]:
part_0.isna()

Unnamed: 0,buyer_d1,buyer_d7,buyer_d14,buyer_d28,buy_d7,buy_d14,buy_d28,iap_revenue_d7,iap_revenue_d14,iap_revenue_d28,...,user_bundles_l28d,weekend_ratio,weeks_since_first_seen,wifi_ratio,whale_users_bundle_num_buys_prank,whale_users_bundle_revenue_prank,whale_users_bundle_total_num_buys,whale_users_bundle_total_revenue,row_id,datetime
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,True,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,True,True,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,True,True,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121882,False,False,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,False,False
121883,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,True,True,False,False
121884,False,False,False,False,False,False,False,False,False,False,...,True,True,True,True,True,True,True,True,False,False
121885,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,True,True,True,False,False


## Using Dask with PyTorch

The main objective is to be able to use PyTorch's DataLoaders with our Dask DataFrame. This is difficult because PyTorch does not accept Dask DataFrames.

In [25]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils as utils

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

The _state-of-the-art_ solution (ChatGPT's suggestion) is to create a subclass of the DataLoader.

In [26]:
class DaskIterableDataset(utils.data.IterableDataset):
    """A PyTorch IterableDataset fitted to accept Dask DataFrames as input."""
    
    def __init__ (self, ddf: dd.DataFrame):
        self.ddf = ddf

    def __iter__ (self):
        for part in self.ddf.to_delayed():
            pdf = part.compute()
            for _, row in pdf.iterrows():
                yield row
            del pdf   # optional but safe

Then iterate over it. This takes a LOT of time, so we 

In [None]:
dataset = DaskIterableDataset(ddf)

N = 1e5
for i, row in enumerate(dataset):
    if i % N == 0:
        print(row.values)

[0 1 1 1 1 1 1 2.147718 2.147718 2.147718 nan 1.0 1.0 1.0 1.0 0.0 1.0
 '26fc75598f3a2e63782b4166883034072c49b4aa' 'game' 'skill game' None None
 'us' None 'generic' 'android 16.0' 'android' '16.0.0' '00' '2025_july'
 nan 3 2.0 None 17.0 None list([('Game/Card', 241)])
 list([('Solitaire', 9)]) list([('Game/Card', 2)])
 list([('Solitaire', 1)])
 array(['d54f414516df4ba311d4b2a67b2d895175ea285b',
        '0de540d0711175dd3a4ffa7e7f8450abc7870328'], dtype=object)
 list([('US-CA-Modesto', 372), ('US-PA-Bovard', 2), ('US-CA-Tracy', 103), ('US-CA-Fresno', 54), ('US-PA-Youngstown', 3)])
 list([('US', 534)])
 list([('r', 324.06158940143393), ('i', 268.11353594014605)])
 list([('i', 0.8640278470866332), ('r', 0.8215736422446216)]) None None
 list([('en', 534)]) list([('15.0.0', 534)]) 1759190400.0
 list([('d026805c95a7fa27fcae2a72fd5d6d092b7653bf', 1758499200), ('43ea9ab81b256dc4c5ed3786e645bad87c3b6e93', 1759190400), ('047665c6af07408267630aecb9534676e55d4178', 1759104000), ('25cdfd75fffab2e19

[0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0
 'ff9bdd5dc427114eef1c6372526548e01a5ef45d' 'finance' 'finance'
 'investing & financial management (general)' None 'ng' None 'apple'
 'iphone 14 pro' 'ios' '17.5.1' '01' '2022_september' 1499.0 3 2.5
 list([('a5a357330e68f6583e87163b9aa063205391fa12', 1)]) nan
 list([('a5a357330e68f6583e87163b9aa063205391fa12', 2651)])
 list([('Games/Games/Adventure', 6), ('Games/Games/Casual', 143)])
 list([('Idle RPG', 6), ('Artillery Shooter', 143)])
 list([('Games/Games/Casual', 1), ('Games/Games/Adventure', 1)])
 list([('Artillery Shooter', 1), ('Idle RPG', 1)]) None None
 list([('HK', 149)])
 list([('b', 46.09741826923077), ('r', 14.619424116056797), ('i', 26.46763445660913)])
 list([('r', 0.314370777665527), ('i', 0.6973684210526315), ('b', 0.7944078947368421)])
 None None list([('zh', 149)]) list([('15.8.4', 149)]) 1758412800.0
 list([('5c0b36b58157c8185946e329ea180a435f235b4c', 1758412800), ('a5a357330e68f6583e87163b9aa063205391fa12', 1758

[0 0 0 0 0 0 0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
 '3817dd799bec28bc68dcba7c6e6690a737af20cc' None None None
 'cloudflare warp' 'bt' 'thimphu district' 'oneplus' 'cph2487' 'android'
 '15.0.0' '06' '2023_february' 488.0 3 1.5 None nan None
 list([('Application/Tools', 20)]) list([('Cloud Storage', 20)])
 list([('Application/Tools', 1)]) list([('Cloud Storage', 1)]) None
 list([('BT-15-Thimphu', 17)]) list([('BT', 20)]) None None None None
 list([('en', 20)]) list([('15.0.0', 20)]) 1758153600.0
 list([('572f4ea557dbb90dad815064a616d7d7f76a939c', 1758153600)])
 list([('Cloud Storage', 1758153600)])
 list([(20, 0.05), (21, 0.3), (9, 0.3), (22, 0.25), (23, 0.1)]) None None
 None nan None None nan None None None None
 list([('32233e3b05ed51c8ee5deec8728fe79aa3686756', [('session', 3)])])
 list([('32233e3b05ed51c8ee5deec8728fe79aa3686756', [('session', 1757839212)])])
 None array(['572f4ea557dbb90dad815064a616d7d7f76a939c'], dtype=object)
 None None None list([('BT-34', 3), ('BT-15', 17)

[0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 1.0 1.0
 '7eaeef21e215654ea75a5ef78692d1f8ffd0ca54' 'sport betting' 'unknown' None
 None 'ru' None 'samsung' 'sm-a045f' 'android' '14.0.0' '10'
 '2022_october' 227.0 3 nan None nan None None None None None None None
 None None None None None None None nan None None None None None None nan
 None None nan None None None None None None None None None None None None
 None None None None nan nan nan None None None None
 'cc81b633-5f8a-48b4-b1a3-f67bc0bafc55' '2025-10-01-10-00']
[0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 'db42749ef73fd488a95e37718f033eab132da07c' 'application' 'communication'
 'social messaging' 'windstream communications' 'us' 'new york' 'google'
 'pixel 8a' 'android' '15.0.0' '11' '2024_may' 559.0 3 nan None nan None
 None None None None None None None None None None None None None nan None
 None None None None None nan None None nan None None None None None None
 None None None None None None None None None None 

[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 0.0 0.0 1.0 0.0 0.0 0.0
 'd5cdf45466c8edead05b468320028a02319c80cb' None None None 'home-connect'
 'za' 'gauteng' 'samsung' 'sm-a055f' 'android' '15.0.0' '13'
 '2023_october' nan 3 3.5 None nan None
 list([('Application/Communication', 8), ('Application/Video Players & Editors', 2), ('Application/Music & Audio', 14), ('Game/Trivia', 82)])
 list([('Browsers', 8), ('Video Editing', 2), ('Media Players', 14), ('Trivia', 82)])
 list([('Application/Music & Audio', 1), ('Application/Communication', 1), ('Game/Trivia', 1)])
 list([('Trivia', 1), ('Browsers', 1), ('Media Players', 1)])
 array(['cd9b2558bcf52078c64242e751017419651181d9'], dtype=object)
 list([('ZA-GP-Johannesburg', 213), ('ZA-FS-Bloemfontein', 4)])
 list([('ZA', 222)]) None None None None list([('en', 222)])
 list([('15.0.0', 222)]) 1758499200.0
 list([('f23c7f08f481fd287e18f33f2724ec7ebaea3b15', 1758499200)])
 list([('Trivia', 1758499200)])
 list([(5, 0.08290155440414508), (14, 0.0621761658031088

[0 1 1 1 1 1 1 4.99 4.99 4.99 nan 1.0 1.0 1.0 0.0 1.0 0.0
 'c05bb88558ec5eee70e3c659f7a96923f3654405' None None None 'comcast cable'
 'us' 'california' 'apple' 'iphone 15 pro max' 'ios' '18.6.2' '15'
 '2023_september' 1599.0 3 5.0
 list([('0edb6bfe7f3e97988a54674bea094645d80d95be', 1), ('3de12b33d33e8a1c783147b78952b0f482098469', 2)])
 nan
 list([('0edb6bfe7f3e97988a54674bea094645d80d95be', 78), ('3de12b33d33e8a1c783147b78952b0f482098469', 803)])
 list([('Games/Games/Casual', 78), ('Productivity/Utilities', 1), ('Games/Games/Simulation', 8), ('Games/Games/Puzzle', 6327), ('Entertainment/Games/Puzzle', 14), ('Graphics & Design/Utilities', 5), ('Utilities/Games', 3), ('Utilities/Entertainment', 2), ('Graphics & Design/Entertainment', 10)])
 list([('Cloud Storage', 1), ('Other Puzzle', 5), ('Customization & Ringtones', 20), ('Jigsaw', 7), ('Block', 6322), ('Coin Looters', 8), ('Platformer / Runner', 78), ('Drawing & Coloring', 7)])
 list([('Games/Games/Casual', 1), ('Graphics & Design/Ent

[0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 '7eaeef21e215654ea75a5ef78692d1f8ffd0ca54' 'sport betting' 'unknown' None
 None 'ru' None 'poco' '21061110ag' 'android' '12.0.0' '17' '2021_august'
 383.0 3 nan None nan None None None None None None None None None None
 None None None None nan None None None None None None nan None None nan
 None None None None None None None None None None None None None None
 None None nan nan nan None None None None
 '57c55467-ef75-4034-927a-d8f5d84454c6' '2025-10-01-17-00']
[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0
 'aa0d51b5f556725ba8abf4d5bd391b7a469cdefe' 'games' 'skill games'
 'solitaire' None 'us' None 'samsung' 'sm-s928u' 'android' '16.0.0' '18'
 '2024_january' 1660.0 3 4.5
 list([('59ad4da83a2d99a5d751e07195810283738cb2d1', 1)]) nan
 list([('59ad4da83a2d99a5d751e07195810283738cb2d1', 201)])
 list([('Game/Card', 57), ('Application/Communication', 12), ('Application/Lifestyle', 96), ('Application/Weather', 80), ('Game/Word',

[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0
 '25904ae8f9d3b0559ced40f6c94a782a2066a7ad' 'lifestyle' 'unknown'
 'coupons & rebates (general)' None 'us' None 'apple' 'iphone 15' 'ios'
 '18.6.2' '21' '2023_september' 1099.0 3 nan None nan None None None None
 None None None None None None None None None None nan None None None None
 None None nan None None nan None None None None None None None None None
 None None None None None None None nan nan nan None None None None
 '764d1f14-1906-4664-a662-3c8fa813389b' '2025-10-01-21-00']
[0 0 0 0 0 0 0 0.0 0.0 0.0 nan nan nan nan nan nan nan
 '366cc2284511794d657dad6ad60cf15eeabeca20' 'games' 'games/puzzle'
 'match pair' None 'us' None 'samsung' 'sm-s918u' 'android' '15.0.0' '22'
 '2023_february' 1620.0 3 5.5
 list([('e00a1b664b26922002a5c021556ba5572f2681ce', 1), ('a05f13a3b46d196edb8fb2b6a5511a012dbad18f', 4), ('5ff309487d529b8d85cdd8aaa8d9c30ebbd3c732', 2)])
 nan
 list([('e00a1b664b26922002a5c021556ba5572f2681ce', 786), ('a05f13a3b4

[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 1.0 1.0 0.0 0.0 1.0 0.0
 'c39b147d59d698641d4198d04ee2e3e0f78b1122' 'games' 'play2earn'
 'coupons & rebates (general)' 'spectrum' 'us' 'north carolina' 'at&t'
 'u304aa' 'android' '9.0.0' '03' '2018_october' nan 4 3.0
 list([('db42749ef73fd488a95e37718f033eab132da07c', 3)]) 9.0
 list([('db42749ef73fd488a95e37718f033eab132da07c', 1307)])
 list([('Application/Video Players & Editors', 1), ('Application/Communication', 60)])
 list([('Content Downloaders', 1), ('Social Messaging', 60)])
 list([('Application/Communication', 1)]) list([('Social Messaging', 1)])
 array(['b6362eb69435662dd1245a80900d44b631099144'], dtype=object)
 list([('US-NC-Greensboro', 201)]) list([('US', 201)]) None None None None
 list([('en', 201)]) list([('9.0.0', 201)]) nan None None
 list([(0, 0.014925373134328358), (10, 0.208955223880597), (14, 0.04975124378109453), (1, 0.03980099502487562), (9, 0.23383084577114427), (13, 0.004975124378109453), (22, 0.14925373134328357), (12, 0.00497512

[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 1.0 1.0 1.0 1.0 1.0 1.0
 'a53727ee495b346f5fb76a0dce8e88f22d26721b' 'games' 'games/casual match 3'
 'match swap' None 'de' None None 'sm-s921b' 'android' None '06'
 '2024_january' 860.0 4 2.0 None nan None
 list([('Application/Music & Audio', 2), ('Application/Shopping', 3)])
 list([('Buy & Sell (General)', 3), ('Books & Comics (General)', 2)]) None
 None array(['d432d3b7f1dceb61dad95b78d0769a24b60ecaf3',
             'bedb08ebe59f96aaebee814067ffcbca34bd4962'], dtype=object)
 list([('DE-BY-Wolfsegg', 2), ('DE-RP-Hachenburg', 10), ('DE-BY-Regensburg', 3), ('DE-BY-Spalt', 1), ('DE-BY-Leinburg', 1), ('DE-NW-Cologne', 2), ('DE-BE-Berlin', 2), ('DE-NI-Wolfsburg', 4), ('DE-RP-Hattert', 6), ('DE-HE-Frankfurt am Main', 3), ('DE-BY-Grafenwohr', 4), ('DE-BB-Brandenburg', 1)])
 list([('DE', 43)])
 list([('i', 20.41313606), ('b', 0.3604625511764707), ('r', 13.454646666666667)])
 list([('b', 0.3433249009593814), ('i', 0.6245210727969349), ('r', 0.47587482219061167)])

[0 0 0 0 0 0 0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
 'fcce26ae1abe348cb24c4f4e542957ef5d9773fd' 'social networking'
 'entertainment' None None 'bd' None 'nokia' 't20' 'android' '13.0.0' '09'
 '2013_september' nan 4 3.0
 list([('c8b4200052cb9e1625720749d2a5b9d4905661c4', 2), ('103c0b4b4eedff1d5924c4a59ee266c2be31ca55', 1)])
 nan
 list([('c8b4200052cb9e1625720749d2a5b9d4905661c4', 116), ('103c0b4b4eedff1d5924c4a59ee266c2be31ca55', 110)])
 list([('Game/Action', 7), ('Game/Sports', 7), ('Game/Casual', 4)])
 list([('.io', 1), ('Platformer / Runner', 6), ('Virtual Pet', 4), ('Realistic Sports', 7)])
 list([('Game/Sports', 1), ('Game/Action', 1)])
 list([('Realistic Sports', 1), ('Platformer / Runner', 1)])
 array(['cd9b2558bcf52078c64242e751017419651181d9',
        '3817dd799bec28bc68dcba7c6e6690a737af20cc'], dtype=object)
 list([('BD--Dhaka', 8)]) list([('BD', 21)]) None None None None
 list([('en', 21)]) list([('13.0.0', 21)]) 1759276800.0
 list([('95901e6e98f631395497da747eee5ed24c9880

[0 0 0 0 0 0 0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
 '3817dd799bec28bc68dcba7c6e6690a737af20cc' None None None 'zenlayer' 'us'
 'california' 'samsung' 'sm-s9010' 'android' '12.0.0' '13' '2022_february'
 850.0 4 nan None nan None None None None None None None None None None
 None None None None nan None None None None None None nan None None nan
 None None None None None None None None None None None None None None
 None None nan nan nan None None None None
 '3f21691a-41e2-4628-8a6a-3737de1026b9' '2025-10-02-13-00']
[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 1.0 0.0 0.0 1.0 0.0 0.0
 '001b729f996da29d01e9e3641ac7fb153e6bc98e' 'game' 'casual' 'match swap'
 None 'in' None 'iqoo' 'i2405' 'android' '15.0.0' '13' '2025_june' 538.0 4
 nan None nan None None None None None None None None None None None None
 None None nan None None None None None None nan None None nan None None
 None None None None None None None None None None None None None None nan
 nan nan None None None None '5275aab4-fc79-463c-98

[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0
 '3435438fda9d548afbc383169b683389cae733f1' 'games' 'skill games' 'slots'
 'spectrum business' 'us' 'california' 'apple' 'iphone 11 pro max' 'ios'
 '15.6.0' '16' '2019_september' 1499.0 4 4.0
 list([('99a648fa6efb5641d457dc54c0a67c497ce68d2a', 1)]) nan
 list([('99a648fa6efb5641d457dc54c0a67c497ce68d2a', 824)])
 list([('Games/Games/Action', 126), ('Entertainment/Photo & Video', 14)])
 list([('Platformer / Runner', 126)])
 list([('Entertainment/Photo & Video', 1), ('Games/Games/Action', 1)])
 list([('Platformer / Runner', 1)]) None
 list([('ID-JT-Jakarta', 1), ('ID-JK-Jakarta', 21)]) list([('ID', 145)])
 list([('i', 2.814114495444444), ('r', 6.9821898317499995), ('b', 0.17836251118518517)])
 list([('r', 0.6103077888703355), ('i', 0.36417130610122633), ('b', 0.2944582164524757)])
 None None list([('id', 145)]) list([('18.6.0', 1), ('18.6.2', 144)]) nan
 None None
 list([(21, 0.15384615384615385), (17, 0.06293706293706294), (18, 0.4545

[0 0 0 0 0 0 0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 0.0
 'db42749ef73fd488a95e37718f033eab132da07c' 'application' 'communication'
 'social messaging' 'at&t internet' 'us' 'florida' 'samsung' 'sm-t307u'
 'android' '9.0.0' '20' '2020_april' 280.0 4 nan None nan None None None
 None None None None None None None None None None None nan None None None
 None None None nan None None nan None None None None None None None None
 None None None None None None None None nan nan nan None None None None
 '74287d44-5d36-4406-b61c-c1e7af44c2ac' '2025-10-02-20-00']
[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 1.0 0.0 1.0 1.0 0.0 0.0
 '001b729f996da29d01e9e3641ac7fb153e6bc98e' 'game' 'casual' 'match swap'
 None 'tr' None 'infinix' 'x6528' 'android' '13.0.0' '21' '2019_november'
 113.0 4 5.5 None nan None
 list([('Application/Photography', 41), ('Application/Lifestyle', 4), ('Game/Puzzle', 176)])
 list([('Photo Editing', 41), ('Riddle', 27), ('Religion & Spirituality (General)', 4), ('Match Pair', 149)])
 list([('Gam

[1 1 1 1 1 1 1 0.5432643556200979 0.5432643556200979 0.5432643556200979
 nan 1.0 0.0 0.0 1.0 0.0 0.0 '1c88d1e1f99e2e2c35b97272ecf52c9a9e0a905c'
 'games' 'games/racing' 'action sandbox' 'novanet sp telecom' 'br'
 'são paulo' 'motorola' 'one fusion' 'android' '11.0.0' '23'
 '2019_september' 250.0 4 6.0
 list([('4a362b7368057cb3aeee27b06cc9621b2fe4176e', 1)]) 11.0
 list([('4a362b7368057cb3aeee27b06cc9621b2fe4176e', 36)])
 list([('Application/Social', 371), ('Application/Maps & Navigation', 2)])
 list([('Dating', 371), ('Bus & Rail Services', 2)])
 list([('Application/Social', 1)]) list([('Dating', 1)])
 array(['9a76cf31c46a6ae933a1ff369d2ed1fc36fe0a3b',
        '3a733cb011b8a90097c38782248e12b0a52838f8',
        'cd9b2558bcf52078c64242e751017419651181d9',
        '3817dd799bec28bc68dcba7c6e6690a737af20cc'], dtype=object)
 list([('BR-SP-Sao Paulo', 699)]) list([('BR', 699)]) None None None None
 list([('pt', 699)]) list([('11.0.0', 699)]) 1759104000.0
 list([('d2b686b27c116e21fc71f217d9ecf

[1 1 1 1 1 1 1 0.49 0.49 0.49 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 '3b565d52f6463cc295dc9c1f82f9c2eb3aed0a38' 'application' 'dating' None
 None 'mx' None 'xiaomi' '23129ra5fl' 'android' '14.0.0' '00'
 '2023_september' nan 5 7.0 None nan None
 list([('Application/Tools', 12), ('Application/Entertainment', 1), ('Application/Video Players & Editors', 64)])
 list([('Media Players', 65), ('Customization & Ringtones', 9), ('Browsers', 1), ('Utilities (General)', 2)])
 list([('Application/Video Players & Editors', 1), ('Application/Tools', 1)])
 list([('Media Players', 1), ('Customization & Ringtones', 1)])
 array(['792d078beb1e6f63223cf8316814378e598651c5',
        '7aa10a57509765894796e3fccff8d73d75ceac35'], dtype=object)
 list([('MX-SIN-Culiacán', 112), ('MX-JAL-Culiacán', 1), ('MX-JAL-Guadalajara', 34), ('MX--Tlalpan', 7), ('MX--Culiacán', 3), ('MX--Morelos', 1), ('MX-NLE-Tlalpan', 11), ('MX-SIN-Mazatlán', 2), ('MX-TAM-Valle Hermoso', 4), ('MX-CHH-Ciudad Juárez', 1)])
 list([('MX', 369)]) list([(

[0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 'fcce26ae1abe348cb24c4f4e542957ef5d9773fd' 'social networking'
 'entertainment' None None 'iq' None 'infinix' 'x6858' 'android' '15.0.0'
 '05' '2025_march' nan 5 3.5 None nan None
 list([('Application/Music & Audio', 1)]) list([('Media Players', 1)])
 None None
 array(['cd9b2558bcf52078c64242e751017419651181d9'], dtype=object)
 list([('IQ-NA-Najaf', 4), ('IQ-KA-Karbala', 3), ('IQ-BA-Basra', 39), ('IQ-BA-Baghdad', 13)])
 list([('IQ', 59)]) None None None None list([('ar', 59)])
 list([('15.0.0', 59)]) nan None None
 list([(0, 0.09259259259259259), (5, 0.037037037037037035), (10, 0.12962962962962962), (14, 0.1111111111111111), (20, 0.018518518518518517), (1, 0.018518518518518517), (6, 0.018518518518518517), (21, 0.07407407407407407), (13, 0.018518518518518517), (17, 0.018518518518518517), (12, 0.2037037037037037), (7, 0.018518518518518517), (18, 0.037037037037037035), (11, 0.12962962962962962), (8, 0.018518518518518517), (19, 0.0185

[0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
 '7eaeef21e215654ea75a5ef78692d1f8ffd0ca54' 'sport betting' 'unknown' None
 None 'th' None 'samsung' 'sm-a156e' 'android' '14.0.0' '09'
 '2023_december' 299.0 5 nan None nan None None None None None None None
 None None None None None None None nan None None None None None None nan
 None None nan None None None None None None None None None None None None
 None None None None nan nan nan None None None None
 '060d9b16-d149-4428-aba4-3573992226b4' '2025-10-03-09-00']
[0 0 0 0 0 0 0 0.0 0.0 0.0 nan 0.0 0.0 0.0 0.0 0.0 0.0
 '001b729f996da29d01e9e3641ac7fb153e6bc98e' 'game' 'casual' 'match swap'
 None 'in' None 'oppo' 'cph2637' 'android' '15.0.0' '10' '2024_july' 357.0
 5 nan None nan None None None None None
 array(['1704071af5bb05db376ed785633f8d8d611f6ee7'], dtype=object)
 list([('IN-DL-New Delhi', 1), ('IN-JK-Jammu', 2)]) list([('IN', 3)]) None
 None None None list([('unknown', 3)]) list([('15.0.0', 3)]) nan None None
 list([(8, 0.

[0 0 0 0 0 0 0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
 '3714e810963165f514bbeec2cc0b54e91239314b' 'application' 'dating'
 'dating' None 'dz' None 'infinix' 'x6525d' 'android' '14.0.0' '14'
 '2023_november' 74.0 5 nan None nan None None None None None None None
 None None None None None None None nan None None None None None None nan
 None None nan None None None None None None None None None None None None
 None None None None nan nan nan None None None None
 '6652d437-5371-43d6-887c-57d747a91a28' '2025-10-03-14-00']
