# Coveo Dataset Split

In [1]:
import pandas as pd

In [2]:
full_dataset_with_pageviews = pd.read_csv("../coveo_dataset/preprocessed_data/coveo_preprocessed_with_pageviews.csv", sep="\t")
full_dataset_without_pageviews = pd.read_csv("../coveo_dataset/preprocessed_data/coveo_preprocessed_without_pageviews.csv", sep="\t")

---
#### Fill in Nan values

In [3]:
full_dataset_with_pageviews.fillna(value={"product_action": "view", "price_bucket": "missing", "category_hash": "missing"}, inplace=True)
full_dataset_with_pageviews.isnull().any()

session_id_hash              False
event_type                   False
product_action               False
product_sku_hash             False
server_timestamp_epoch_ms    False
hashed_url                   False
category_hash                False
price_bucket                 False
dtype: bool

In [4]:
full_dataset_without_pageviews.fillna(value={"product_action": "view", "price_bucket": "missing", "category_hash": "missing"}, inplace=True)
full_dataset_without_pageviews.isnull().any()

session_id_hash              False
event_type                   False
product_action               False
product_sku_hash             False
server_timestamp_epoch_ms    False
hashed_url                   False
category_hash                False
price_bucket                 False
dtype: bool

---
#### Apply item feedback with pageviews

In [5]:
full_dataset_with_pageviews[full_dataset_with_pageviews['event_type'] == 'event_product'].groupby(['product_sku_hash']).size().describe()

count     57483.000000
mean        181.454186
std        1094.123722
min           1.000000
25%           1.000000
50%           5.000000
75%          74.000000
max      179981.000000
dtype: float64

In [6]:
aggregated = full_dataset_with_pageviews[full_dataset_with_pageviews['event_type'] == 'event_product'].groupby(['product_sku_hash']).size()
filtered = aggregated.apply(lambda v: v >= 5)
filtered = filtered.reset_index()
filtered.columns = ['product_sku_hash', 'item_feedback_bool']
ids = filtered[filtered['item_feedback_bool'] == False]['product_sku_hash'].tolist()
full_dataset_with_pageviews = full_dataset_with_pageviews[~full_dataset_with_pageviews['product_sku_hash'].isin(ids)].copy()

In [7]:
full_dataset_with_pageviews[full_dataset_with_pageviews['event_type'] == 'event_product'].groupby(['product_sku_hash']).size().describe()

count     29511.000000
mean        351.928196
std        1507.348074
min           5.000000
25%          20.000000
50%          70.000000
75%         247.000000
max      179981.000000
dtype: float64

---
#### Apply item feedback without pageviews

In [8]:
full_dataset_without_pageviews[full_dataset_without_pageviews['event_type'] == 'event_product'].groupby(['product_sku_hash']).size().describe()

count     57483.000000
mean        181.454186
std        1094.123722
min           1.000000
25%           1.000000
50%           5.000000
75%          74.000000
max      179981.000000
dtype: float64

In [9]:
aggregated_wo = full_dataset_without_pageviews[full_dataset_without_pageviews['event_type'] == 'event_product'].groupby(['product_sku_hash']).size()
filtered_wo = aggregated_wo.apply(lambda v: v >= 5)
filtered_wo = filtered_wo.reset_index()
filtered_wo.columns = ['product_sku_hash', 'item_feedback_bool']
ids_wo = filtered_wo[filtered_wo['item_feedback_bool'] == False]['product_sku_hash'].tolist()
full_dataset_without_pageviews = full_dataset_without_pageviews[~full_dataset_without_pageviews['product_sku_hash'].isin(ids_wo)].copy()

In [10]:
full_dataset_without_pageviews[full_dataset_without_pageviews['event_type'] == 'event_product'].groupby(['product_sku_hash']).size().describe()

count     29511.000000
mean        351.928196
std        1507.348074
min           5.000000
25%          20.000000
50%          70.000000
75%         247.000000
max      179981.000000
dtype: float64

---
### Split with pageviews

In [11]:
full_dataset_with_pageviews.sort_values(['server_timestamp_epoch_ms'], inplace=True)

full_dataset_with_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
1498902,0e8c1b8627a84cb222ea427ad48aa58a1b74ef78c4422c...,pageview,view,5f1f0285dc64c6c062c6396a638e952a23131fddc29c41...,1547528564513,5f1f0285dc64c6c062c6396a638e952a23131fddc29c41...,missing,missing
1551735,0f1416c8c68bb9209c1bbc4576386df5480e9757f55ce9...,pageview,view,41ae0f916406434064aeb03d859a7141b53d2900394dca...,1547528568784,41ae0f916406434064aeb03d859a7141b53d2900394dca...,missing,missing
5503292,35a0f82eeb7330a51b4d459fef5c0b7628122e0e4fc58a...,event_product,detail,7bbec596889630855194fc44d071539772de02c269d48d...,1547528569510,aad195d55bf22842831f5064595dd28a6b44ac8fc9ff35...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
25311552,f6362a47909606fa3a067c11fda55b6983cc261ed4fea1...,pageview,view,fa0a7e983fcd7fd743d97e261e4f71a687faf27e371707...,1547528569729,fa0a7e983fcd7fd743d97e261e4f71a687faf27e371707...,missing,missing
4275711,29acd0097e81f31960b1e659bd0031b782b2a3599a5e69...,event_product,detail,11bb1a83b2d774e66feeabeb05ccc14e00e2cd30ec7816...,1547528572012,24507a6b857b6f6f092a6e68fc534ff9194e9e666cc9c1...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,6.0
...,...,...,...,...,...,...,...,...
5292201,3395776a7edcc18f855758894902bba7ec32ed7aac5b40...,pageview,view,199df805067b2151516282e63935b19a1845b93bf2ebbb...,1555300793549,199df805067b2151516282e63935b19a1845b93bf2ebbb...,missing,missing
8552556,536078774cdcf7e65e3ede3842464a270685a839aae503...,event_product,detail,9558cec0186602c0d3eab35b9f05da2a84f82aaaaa8917...,1555300793597,f7d46429ac173df4142d791bbc4381808c3d470b56da93...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0
21966153,d5a4bac39f6b58a67d418d8f9d1a7ecbdd25580e2aae22...,event_product,detail,e4fd366ead2fe9e77990ac411c8ad3ffeaa232b4dd70be...,1555300794991,dfa42922e0d999a0df417eacd3f9d0d2ecd3dbaf26daa4...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,5.0
1482327,0e62da9f4807fdc126c02331887479d5a56e1ab0e31a06...,event_product,detail,dece31cb7204a7c425cdb6048bdf4bb68c23914bc26f7b...,1555300797127,17ecd9ea103678c7c906005cb904c6416e9978b4d3f30d...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,6.0


In [12]:
# end_of_train = 1552017853614 (with search clicks)
# end_of_validation = 1553611368379 (with search clicks)
end_of_train = 1552013836849
end_of_validation = 1553609194110

In [13]:
train_with_pageviews = full_dataset_with_pageviews.loc[(full_dataset_with_pageviews['server_timestamp_epoch_ms']<=end_of_train)].copy()
validation_with_pageviews = full_dataset_with_pageviews.loc[
    (full_dataset_with_pageviews['server_timestamp_epoch_ms']<=end_of_validation) & (full_dataset_with_pageviews['server_timestamp_epoch_ms']>end_of_train)].copy()
test_with_pageviews = full_dataset_with_pageviews.loc[(full_dataset_with_pageviews['server_timestamp_epoch_ms']>end_of_validation)].copy()

In [14]:
train_with_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
1498902,0e8c1b8627a84cb222ea427ad48aa58a1b74ef78c4422c...,pageview,view,5f1f0285dc64c6c062c6396a638e952a23131fddc29c41...,1547528564513,5f1f0285dc64c6c062c6396a638e952a23131fddc29c41...,missing,missing
1551735,0f1416c8c68bb9209c1bbc4576386df5480e9757f55ce9...,pageview,view,41ae0f916406434064aeb03d859a7141b53d2900394dca...,1547528568784,41ae0f916406434064aeb03d859a7141b53d2900394dca...,missing,missing
5503292,35a0f82eeb7330a51b4d459fef5c0b7628122e0e4fc58a...,event_product,detail,7bbec596889630855194fc44d071539772de02c269d48d...,1547528569510,aad195d55bf22842831f5064595dd28a6b44ac8fc9ff35...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
25311552,f6362a47909606fa3a067c11fda55b6983cc261ed4fea1...,pageview,view,fa0a7e983fcd7fd743d97e261e4f71a687faf27e371707...,1547528569729,fa0a7e983fcd7fd743d97e261e4f71a687faf27e371707...,missing,missing
4275711,29acd0097e81f31960b1e659bd0031b782b2a3599a5e69...,event_product,detail,11bb1a83b2d774e66feeabeb05ccc14e00e2cd30ec7816...,1547528572012,24507a6b857b6f6f092a6e68fc534ff9194e9e666cc9c1...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,6.0
...,...,...,...,...,...,...,...,...
2431837,17ac0e573aadaab2a7af2fd158288eb07778b21f2ea532...,pageview,view,b69d55be26c2c7a796e95fa521b2e6ad8b53300d9337c6...,1552013835243,b69d55be26c2c7a796e95fa521b2e6ad8b53300d9337c6...,missing,missing
23519326,e4c2a7d0bbc5b72873dc578b55ed451b416cacd6cb00af...,event_product,detail,8f750741be9f3d2dabb88eb9e3dd7da1938eba4afff04b...,1552013835478,b749eb68732894e3cdfa256ba67af9ae33b52c846e4409...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
22066262,d69e5aaa98f2f755cb3e4b8d1f61a3114754cd8e6198f9...,pageview,view,fec3a78c2cb5e32bd970acd65024e4c8582d23ef34fcf2...,1552013835853,fec3a78c2cb5e32bd970acd65024e4c8582d23ef34fcf2...,missing,missing
23145537,e1210606eaff807bd001c04d28450e6bff025b67fa8915...,pageview,view,813083c9e5516304eba12c1f4a8f14843c2b98c83fc532...,1552013836618,813083c9e5516304eba12c1f4a8f14843c2b98c83fc532...,missing,missing


In [15]:
validation_with_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
16849193,a3ec8b3310f7d6ea5df1c119692aa796f29431d866a39c...,pageview,view,95734b76e6a0633ef2ac7dd017007377a6714cf7526dec...,1552013836923,95734b76e6a0633ef2ac7dd017007377a6714cf7526dec...,missing,missing
8078166,4ebfd69ca187a52a95ddec92136bc51568fc1ff0da7e67...,event_product,add,4144366cf2f58af53edf7e2435e5d5a20d96056de55d16...,1552013837027,83b4fdad686c1be4eba335f70d23ae202b84b6153e109e...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,1.0
11021354,6b55ae420bf449713d4ba557fd21e3048956480a0f9eee...,event_product,detail,b01f4ea3ac48620e31600a6072f0cf9ef15b66351724a0...,1552013837032,ed348618a610c5b2680f57f79ea1b302bedc499e56f967...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
12433394,78f94e117dd35424471ff139acf45655258b06272dbb64...,event_product,detail,eb66dd2792979b76cf7753c55c390757c6cbbba4e14d74...,1552013837068,42bc734276514a9bb2e477b514e10ee787ece48e359916...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
15264231,947c8db6c1b903a6cfde4b39efdc2b3397a0b5a0e8eb06...,pageview,view,84613655bd524ad767ed6840128e17944c67c6f71d0e85...,1552013837239,84613655bd524ad767ed6840128e17944c67c6f71d0e85...,missing,missing
...,...,...,...,...,...,...,...,...
196799,01f0409432b9603cb7b404b936faffb74ab9152031d20c...,event_product,detail,1a85b8647d7fbeb7d0ba533a3419ca9430894659cd1709...,1553609193128,17c10ae44e492ac20f89ca0d2a0928fe5b07e32cfd07f4...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
20517760,c78f1254375c61d4a408385ff0761f1b64959c5905c7b7...,pageview,view,47a4589e7724a2dd37e772fb9344cdf487f7963a1a568c...,1553609193321,47a4589e7724a2dd37e772fb9344cdf487f7963a1a568c...,missing,missing
12653970,7b1c8e459311ce025051f3c8479cc172ae04364d4b5492...,pageview,view,433b0e71df1fe9a8d1f45647545701f6108414c40eef76...,1553609193592,433b0e71df1fe9a8d1f45647545701f6108414c40eef76...,missing,missing
2713835,1a6e02d4ce9714c7faa163b86f47a8ab540f5e644db198...,pageview,view,78151753f5796d07c69a40b2ed78edbdcffb205fcb5cc8...,1553609193884,78151753f5796d07c69a40b2ed78edbdcffb205fcb5cc8...,missing,missing


In [16]:
test_with_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
8279654,50ba1aca2ee06ba37cf89999dd25748b5c709e1e562fcb...,event_product,detail,27db9ad869d01e269a3b410c5a10c47efffa9d91f563a0...,1553609194308,defed2764f738c9c7d7d8d723744d56ba95d8fe9dfe67c...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
4332142,2a37c6427b624b42e74734d629b5de516f5b9899394dff...,event_product,detail,2cc05ecbf2bd90ea3ae63cee1024349dce0d53d982b5ab...,1553609195242,8cb1117f5a3dc5bb294067ee0888282c4ada4a4f899eb6...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,8.0
8304734,50f8c6456281cb81029f392a0cb53885dcb7e643c136be...,event_product,detail,4486575df4a2d4deb0354c47060cbd66bcb9cee319cce4...,1553609195247,4040b1d24ccf077cacdd0e92b4770096b57f3dff7a01b1...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
25520366,f83e191f91e9a122be804bbd89f972c5a52d9cf61d1fed...,pageview,view,433b0e71df1fe9a8d1f45647545701f6108414c40eef76...,1553609195729,433b0e71df1fe9a8d1f45647545701f6108414c40eef76...,missing,missing
13740590,85a4da205673c17b7051453d3584509f12bea794e1dc3c...,event_product,detail,67c1338c253813903ceca9fdcab282fcef9552af0f1a34...,1553609196097,de14a07e5536b1301a9aa0caa95a1356805ac477e07e1f...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
...,...,...,...,...,...,...,...,...
5292201,3395776a7edcc18f855758894902bba7ec32ed7aac5b40...,pageview,view,199df805067b2151516282e63935b19a1845b93bf2ebbb...,1555300793549,199df805067b2151516282e63935b19a1845b93bf2ebbb...,missing,missing
8552556,536078774cdcf7e65e3ede3842464a270685a839aae503...,event_product,detail,9558cec0186602c0d3eab35b9f05da2a84f82aaaaa8917...,1555300793597,f7d46429ac173df4142d791bbc4381808c3d470b56da93...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0
21966153,d5a4bac39f6b58a67d418d8f9d1a7ecbdd25580e2aae22...,event_product,detail,e4fd366ead2fe9e77990ac411c8ad3ffeaa232b4dd70be...,1555300794991,dfa42922e0d999a0df417eacd3f9d0d2ecd3dbaf26daa4...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,5.0
1482327,0e62da9f4807fdc126c02331887479d5a56e1ab0e31a06...,event_product,detail,dece31cb7204a7c425cdb6048bdf4bb68c23914bc26f7b...,1555300797127,17ecd9ea103678c7c906005cb904c6416e9978b4d3f30d...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,6.0


In [17]:
train_with_pageviews.sort_values(['session_id_hash', 'server_timestamp_epoch_ms'], inplace=True)
validation_with_pageviews.sort_values(['session_id_hash', 'server_timestamp_epoch_ms'], inplace=True)
test_with_pageviews.sort_values(['session_id_hash', 'server_timestamp_epoch_ms'], inplace=True)

In [18]:
train_with_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
14,00000277639fc5c6f816654b78bf3654ece7fd53a7338f...,pageview,view,433b0e71df1fe9a8d1f45647545701f6108414c40eef76...,1551911408318,433b0e71df1fe9a8d1f45647545701f6108414c40eef76...,missing,missing
15,00000277639fc5c6f816654b78bf3654ece7fd53a7338f...,pageview,view,41ae0f916406434064aeb03d859a7141b53d2900394dca...,1551911427489,41ae0f916406434064aeb03d859a7141b53d2900394dca...,missing,missing
16,00000277639fc5c6f816654b78bf3654ece7fd53a7338f...,pageview,view,7a2517c7a70e17741088fe49930b7252ff1b8aeb9df662...,1551911435813,7a2517c7a70e17741088fe49930b7252ff1b8aeb9df662...,missing,missing
17,00000277639fc5c6f816654b78bf3654ece7fd53a7338f...,pageview,view,41ae0f916406434064aeb03d859a7141b53d2900394dca...,1551911472125,41ae0f916406434064aeb03d859a7141b53d2900394dca...,missing,missing
18,00000277639fc5c6f816654b78bf3654ece7fd53a7338f...,pageview,view,c8f169341054e85bbb75d324eff266f718949dd6641314...,1551911499487,c8f169341054e85bbb75d324eff266f718949dd6641314...,missing,missing
...,...,...,...,...,...,...,...,...
26313717,fffffc128ba14ec4d4b2a230b4352453843b3bb59becf5...,pageview,view,189a154674efcb6ab196fd1f5341be5b3fd5cf4422bf0f...,1550718535420,189a154674efcb6ab196fd1f5341be5b3fd5cf4422bf0f...,missing,missing
26313718,fffffc128ba14ec4d4b2a230b4352453843b3bb59becf5...,event_product,detail,ea950a72ea131ef7181c7dd03f1ed77396648060c1e9cd...,1550718557956,400a4ea44e23f4cfec9e8129d84a9cb90a0cffecbb5406...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
26313719,fffffc128ba14ec4d4b2a230b4352453843b3bb59becf5...,event_product,detail,bed8a2b601108932cbd8b3b14cde1d4919262c60b35cad...,1550718613724,0ca76955e075818c7eadaafddd0d7b565260fc83e84e06...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
26313720,fffffc128ba14ec4d4b2a230b4352453843b3bb59becf5...,event_product,detail,2b1dd7624847cfe832fb6aaba59834f34008ed2820bca3...,1550718642814,c556502992acafffde649f4fff0748f1fcce22f21f5e02...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0


In [19]:
validation_with_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
0,00000114e1075962f022114fcfc17f2d874e694ac5d201...,event_product,detail,cf2f88cb43c1713538f7dfd2aa498a2cb9ebc0c99feeac...,1552423391039,0aa1084eddfb08e4dffbb5a2aa98a5e9679382d982dd97...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
1,00000114e1075962f022114fcfc17f2d874e694ac5d201...,event_product,add,cf2f88cb43c1713538f7dfd2aa498a2cb9ebc0c99feeac...,1552424389158,83b4fdad686c1be4eba335f70d23ae202b84b6153e109e...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
2,00000114e1075962f022114fcfc17f2d874e694ac5d201...,pageview,view,0ad6fab1eb3ac76010ea2fa6399a4e993b00f6501c88a2...,1552424395594,0ad6fab1eb3ac76010ea2fa6399a4e993b00f6501c88a2...,missing,missing
3,00000114e1075962f022114fcfc17f2d874e694ac5d201...,pageview,view,e93e5c83aab0987e41d8fd65a30b54d2ce87491b4a7f9b...,1552424417587,e93e5c83aab0987e41d8fd65a30b54d2ce87491b4a7f9b...,missing,missing
4,00000114e1075962f022114fcfc17f2d874e694ac5d201...,pageview,view,433b0e71df1fe9a8d1f45647545701f6108414c40eef76...,1552424698656,433b0e71df1fe9a8d1f45647545701f6108414c40eef76...,missing,missing
...,...,...,...,...,...,...,...,...
26313621,ffffc3943b2b105554b84d4d105bf6e6349ad2ba31c8d5...,event_product,detail,b39ea7776df9a6d0c51cbbed60a128abdb660e93fe9bfc...,1553548475645,fa7218cc7ed7aa0f5d1dead7d4c17cd182b21bb91467f8...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,7.0
26313659,ffffeb3cebf9f8644b4ccfc0763945ca6e1525d0c77f51...,pageview,view,c5345319c663aa93d6ad6e57fded8e7df9a1473bffb043...,1552481698778,c5345319c663aa93d6ad6e57fded8e7df9a1473bffb043...,missing,missing
26313660,ffffeb3cebf9f8644b4ccfc0763945ca6e1525d0c77f51...,event_product,detail,5faffedaf5801444e5a0758733937c940055beb83e8958...,1552481713860,00d938dea6799202aa483c82505edb6728f8c8b2b3fed6...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,5.0
26313661,ffffeb3cebf9f8644b4ccfc0763945ca6e1525d0c77f51...,pageview,view,c5345319c663aa93d6ad6e57fded8e7df9a1473bffb043...,1552481718062,c5345319c663aa93d6ad6e57fded8e7df9a1473bffb043...,missing,missing


In [20]:
test_with_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
20,000009f36a40de1d557afc083dbb3fc03eef2473337bad...,pageview,view,6fa3ac8ca46b1bb29dd68834f9e135f82935b1f7fd1a54...,1554231745929,6fa3ac8ca46b1bb29dd68834f9e135f82935b1f7fd1a54...,missing,missing
21,000009f36a40de1d557afc083dbb3fc03eef2473337bad...,pageview,view,3ae3392b9a7e6e6301f15b522674e3373c583b4addee72...,1554231757614,3ae3392b9a7e6e6301f15b522674e3373c583b4addee72...,missing,missing
22,00000e812c3076d18245710a31b348d3f23314b7d0dc90...,pageview,view,377665b50a7c78dd8061c477dfd9664a3dd75d3564a390...,1553828350125,377665b50a7c78dd8061c477dfd9664a3dd75d3564a390...,missing,missing
85,00001355930ff05e66ab30bccff221c33eba90e1517397...,event_product,detail,fa95869f053efcee615f4e02bc4fa8d13843b9e80e936f...,1554738829838,00d6784809151bb66d7b8c27b915fa03d11e1ecbfeec59...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,7.0
131,000045a5632b967071f66185b450618013825bd0550faf...,pageview,view,42c2c57d3b0928ed59f84c872e6942897199eebea9e550...,1554401673910,42c2c57d3b0928ed59f84c872e6942897199eebea9e550...,missing,missing
...,...,...,...,...,...,...,...,...
26313676,ffffec0d60277e1ff859623244115db6f3fcfbeaf56304...,event_product,detail,45b304cd266d33857dc4387933feb38380f7cbd0dd2dc6...,1553984562456,2c2cde77820698658e277bb55311ddaf046d898fb973f2...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,9.0
26313677,ffffec0d60277e1ff859623244115db6f3fcfbeaf56304...,event_product,detail,a94a0a26b38fd8c7044be87fea51cf500b9fe61a0d3dea...,1553984613425,43e7432ccbf04bd9f5d1759c18118b1ae262681a9de5fb...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,6.0
26313678,ffffec0d60277e1ff859623244115db6f3fcfbeaf56304...,event_product,detail,08657660c499dd08cc37b1a98d88d83b485c494990dd97...,1553984626977,2a34fd75429d9adc204d65746aa19165e2813f0e6563ac...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0
26313679,ffffec0d60277e1ff859623244115db6f3fcfbeaf56304...,event_product,detail,3c4926b7c61b4d232dc010cd6ccbd280e63ebbf543ac72...,1553984784419,12586982daef37e567dd325901ee92ee3c36fc3d01626e...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0


In [21]:
train_with_pageviews.to_csv(path_or_buf="../coveo_dataset/preprocessed_data/split/with_pageviews/coveo.train.csv", sep="\t", index=False)
validation_with_pageviews.to_csv(path_or_buf="../coveo_dataset/preprocessed_data/split/with_pageviews/coveo.validation.csv", sep="\t", index=False)
test_with_pageviews.to_csv(path_or_buf="../coveo_dataset/preprocessed_data/split/with_pageviews/coveo.test.csv", sep="\t", index=False)

---
### Split without pageviews

In [22]:
full_dataset_without_pageviews.sort_values(['server_timestamp_epoch_ms'], inplace=True)

full_dataset_without_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
2180756,35a0f82eeb7330a51b4d459fef5c0b7628122e0e4fc58a...,event_product,detail,7bbec596889630855194fc44d071539772de02c269d48d...,1547528569510,aad195d55bf22842831f5064595dd28a6b44ac8fc9ff35...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
1694107,29acd0097e81f31960b1e659bd0031b782b2a3599a5e69...,event_product,detail,11bb1a83b2d774e66feeabeb05ccc14e00e2cd30ec7816...,1547528572012,24507a6b857b6f6f092a6e68fc534ff9194e9e666cc9c1...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,6.0
7407531,b5a6027a590325f1600ae5ab167be226480365d04ec54b...,event_product,detail,d6392b60c1bc118a06cdb12caacbc45513cd5d9d9d4aef...,1547528572676,e929358d9653e608677469b4a4d2f8b8c3ee30b1c7fa4f...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
5559051,886a72d9a9cea22f8c30afa1694f1638a8c0a582f74210...,event_product,detail,0d97581c828f05adc0026cac0b89cf010dd80f0dc8be5c...,1547528577856,f58bc02eb40c824aaa1ac35ae5ed06b6a8635aef62b587...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,5.0
7202253,b0a5af5cf4102846dd1a2d1215c6dd094f1be4bc9f7465...,event_product,detail,5233b40ebc34ea1645c425f0fb49ae129e0d87cc6e6c2f...,1547528577889,add8d2d8f1f34f0e99901af4a5a9228fb26324f8a83e11...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
...,...,...,...,...,...,...,...,...
1781523,2bd198560db9f63dcfc59c374585a296c5ccb56196d7d3...,event_product,detail,a3c8358d9f11dbfc1b233df6744d1413ccd897b1b31b35...,1555300783048,f2f72137fc36f10724b1620255f8be6474301841a36aa7...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,2.0
3388622,536078774cdcf7e65e3ede3842464a270685a839aae503...,event_product,detail,9558cec0186602c0d3eab35b9f05da2a84f82aaaaa8917...,1555300793597,f7d46429ac173df4142d791bbc4381808c3d470b56da93...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0
8707964,d5a4bac39f6b58a67d418d8f9d1a7ecbdd25580e2aae22...,event_product,detail,e4fd366ead2fe9e77990ac411c8ad3ffeaa232b4dd70be...,1555300794991,dfa42922e0d999a0df417eacd3f9d0d2ecd3dbaf26daa4...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,5.0
587104,0e62da9f4807fdc126c02331887479d5a56e1ab0e31a06...,event_product,detail,dece31cb7204a7c425cdb6048bdf4bb68c23914bc26f7b...,1555300797127,17ecd9ea103678c7c906005cb904c6416e9978b4d3f30d...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,6.0


In [23]:
# end_of_train = 1552145314852 (with search clicks)
# end_of_validation = 1553710162865 (with search clicks)
end_of_train = 1552138259347
end_of_validation = 1553704815974

In [24]:
train_without_pageviews = full_dataset_without_pageviews.loc[(full_dataset_without_pageviews['server_timestamp_epoch_ms']<=end_of_train)].copy()
validation_without_pageviews = full_dataset_without_pageviews.loc[
    (full_dataset_without_pageviews['server_timestamp_epoch_ms']<=end_of_validation) & (full_dataset_without_pageviews['server_timestamp_epoch_ms']>end_of_train)].copy()
test_without_pageviews = full_dataset_without_pageviews.loc[(full_dataset_without_pageviews['server_timestamp_epoch_ms']>end_of_validation)].copy()

In [25]:
train_without_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
2180756,35a0f82eeb7330a51b4d459fef5c0b7628122e0e4fc58a...,event_product,detail,7bbec596889630855194fc44d071539772de02c269d48d...,1547528569510,aad195d55bf22842831f5064595dd28a6b44ac8fc9ff35...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
1694107,29acd0097e81f31960b1e659bd0031b782b2a3599a5e69...,event_product,detail,11bb1a83b2d774e66feeabeb05ccc14e00e2cd30ec7816...,1547528572012,24507a6b857b6f6f092a6e68fc534ff9194e9e666cc9c1...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,6.0
7407531,b5a6027a590325f1600ae5ab167be226480365d04ec54b...,event_product,detail,d6392b60c1bc118a06cdb12caacbc45513cd5d9d9d4aef...,1547528572676,e929358d9653e608677469b4a4d2f8b8c3ee30b1c7fa4f...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
5559051,886a72d9a9cea22f8c30afa1694f1638a8c0a582f74210...,event_product,detail,0d97581c828f05adc0026cac0b89cf010dd80f0dc8be5c...,1547528577856,f58bc02eb40c824aaa1ac35ae5ed06b6a8635aef62b587...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,5.0
7202253,b0a5af5cf4102846dd1a2d1215c6dd094f1be4bc9f7465...,event_product,detail,5233b40ebc34ea1645c425f0fb49ae129e0d87cc6e6c2f...,1547528577889,add8d2d8f1f34f0e99901af4a5a9228fb26324f8a83e11...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
...,...,...,...,...,...,...,...,...
992817,18715314d52b739ec6437c5075121cee8b7c72c98578b4...,event_product,detail,df89342941ea45d9daf1878a3f360ae8bfa09391e33809...,1552138256918,649186f16baf1eb84b1c28c28be57f6e0d2f5725fb9184...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,6.0
7949300,c2fc736caefc473e7d4689f37b30f30364155d64104b65...,event_product,detail,02811154912b9607cf422e76ed5cee633f7d8c4c1d4bec...,1552138257708,ea6d6b485bfdef094b9f5cd261dd6f1067be205d77d058...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,6.0
7485579,b79407aae15eee7726d6d27600ba0ed64128f6e88de55c...,event_product,detail,d419b3f461bb41955a24aee7fee4b57999169757b1d617...,1552138258091,caf847e940e7ff39327422ec46aea3b1349352b874ad2d...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0
3353206,528297bd0d33d52cf418f598b425522a5c37573fb40c1f...,event_product,detail,4b5d783d4147cdbe045ef63e8f532f3a8553b09b63c3f9...,1552138258534,46e8e08d62ed6369906f1163f2a8d0f411047745f4ff65...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,9.0


In [26]:
validation_without_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
6556377,a0ce14650dfbe442ee24159e02f0acc877ce6412d7647e...,event_product,detail,67f660209665e6020abc1a09f450b018687797dabe76c2...,1552138259508,a0d79b7f6c3cb33e90510624fed8d8cad70c0c8c84bc08...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,4.0
5093942,7d06a9375277e23227869c68554fe00617652c6a22e14e...,event_product,detail,931e376341cf9b625a67962109d232625128dc070f3743...,1552138261110,f66ffe8e1b948d5309ceb3dbfccfb45957e0a5fbd6422b...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,10.0
9279571,e3b3050eb91833ec6f8407841f798c183fc26ef937cd00...,event_product,detail,94b97176cf5849ff8107fc8ff3fdaa30adb4bf377c8395...,1552138261918,36547ed1b46236af0bf19a880a8814945345af462c2f2c...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,1.0
5794764,8e2cab9603ee80072d9470f21908dcaeda9a1e9bdd6ee3...,event_product,detail,c85d9c25a221f16b2398e880db527991603440cc5f6409...,1552138262427,22e98b31bb2937288cbc8583938da4d36d5687115e6ec8...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
6090004,956277112895ded9908cd3f3ed9edf57f0059f6acbb83e...,event_product,detail,f65f1252af6d17eeadf3c17c3d9775388ba591853172f2...,1552138262735,86cc03a4d854f40f85270b09f69cd84014eba557d86d98...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,10.0
...,...,...,...,...,...,...,...,...
1289428,1fb8dfa75bb6448d8f6664a30f239bd74d889c5d5dfe69...,event_product,detail,20345cab144a88e2e55f8c7278a89065fc7d622d38db7a...,1553704812650,1fa8670bbd8f5d7a3df4062b5d208042eda2f5a7d2a669...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,8.0
624280,0f532496b94f8e1c8402bd2ec00669a20f51cf3e1bd863...,event_product,detail,f649ce6c601563a2150f259513f4509c913ed1c99300ce...,1553704813729,f868a0d7a8c2a935537c094a3c03570ed56316ad912018...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,10.0
1387515,2221b30fa117f6806d898ff7b89e2b0930173b5fc0994c...,event_product,detail,096ec729a0a53c08630a8b5e91293489296552e11e58c1...,1553704814655,81044d65cb2ae1b8d12b1ba8b5aa88133d3a562de18e9c...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0
9950086,f42b2a38c876ba596b3b6c5de2cda3226edcfa4b07c6c8...,event_product,detail,26ce666181e06a5c1894c39e85f134c2479f7b1e5b30ca...,1553704814696,1dd852c74e1c72550fca137e4d0cc27be4b1a7eb698080...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0


In [27]:
test_without_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
5319428,828298e7358ec0bfff48f6a0772528a2dce4a62356d525...,event_product,detail,3a040495dba80c5edb33e2f53fbd696e7a3f77b55bd8bd...,1553704818491,c6c3a25e44b507daa270f7f9a2d30f6dc877fcec153e07...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,6.0
2109839,33e77094bc4b480e45be3d3c5d783abe8ab1a0b1ed444c...,event_product,detail,34f0c1beded694432db96e980a792b78588a717d3f5168...,1553704818798,b840a88c503358ebbe589dc452c5129b8a9e7be9e99a96...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,8.0
2067755,32e0aa11a7de716c12f0aa36e0a89e67322fca3aa5b5a7...,event_product,detail,7f8f90c5805252e5917cc613c52b1809640ec3898b13a9...,1553704819194,159d1e1885f57d962739cbc8d0d22954a42647d1c36748...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
9842178,f18826cc5452b9fe40bd99bf452362579edb590ee962cc...,event_product,detail,c4ae07620a50c724c273b4eeb39673cea21947c22cad7f...,1553704820241,c387bb9b12b9dfe96aa2a577d3cfab32b9bf06380cf356...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
3786250,5d18e8309637c3889ee6de74e7ed42926d7b091aa837e1...,event_product,add,663cc55681c1fa63db7655fb900464cedc2e2cb2e60399...,1553704822486,83b4fdad686c1be4eba335f70d23ae202b84b6153e109e...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,5.0
...,...,...,...,...,...,...,...,...
1781523,2bd198560db9f63dcfc59c374585a296c5ccb56196d7d3...,event_product,detail,a3c8358d9f11dbfc1b233df6744d1413ccd897b1b31b35...,1555300783048,f2f72137fc36f10724b1620255f8be6474301841a36aa7...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,2.0
3388622,536078774cdcf7e65e3ede3842464a270685a839aae503...,event_product,detail,9558cec0186602c0d3eab35b9f05da2a84f82aaaaa8917...,1555300793597,f7d46429ac173df4142d791bbc4381808c3d470b56da93...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0
8707964,d5a4bac39f6b58a67d418d8f9d1a7ecbdd25580e2aae22...,event_product,detail,e4fd366ead2fe9e77990ac411c8ad3ffeaa232b4dd70be...,1555300794991,dfa42922e0d999a0df417eacd3f9d0d2ecd3dbaf26daa4...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,5.0
587104,0e62da9f4807fdc126c02331887479d5a56e1ab0e31a06...,event_product,detail,dece31cb7204a7c425cdb6048bdf4bb68c23914bc26f7b...,1555300797127,17ecd9ea103678c7c906005cb904c6416e9978b4d3f30d...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,6.0


In [28]:
train_without_pageviews.sort_values(['session_id_hash', 'server_timestamp_epoch_ms'], inplace=True)
validation_without_pageviews.sort_values(['session_id_hash', 'server_timestamp_epoch_ms'], inplace=True)
test_without_pageviews.sort_values(['session_id_hash', 'server_timestamp_epoch_ms'], inplace=True)

In [29]:
train_without_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
5,000010504025397b03290c7457e0e7ef7ae93529f21eae...,event_product,detail,4d45479446a63680c0a03c1f4a31b9e9a6cba5a9dface8...,1550148170683,55c1afc839373889214e660b142b2557bb005db977bd53...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,1.0
6,000010504025397b03290c7457e0e7ef7ae93529f21eae...,event_product,detail,84ab2546942afe5b4c02f1b594e66251d0080da319e957...,1550148176362,211f78693a94aa5d111d2fd3ea06d8b4c32e29c4125314...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,1.0
7,000010504025397b03290c7457e0e7ef7ae93529f21eae...,event_product,detail,f0999c89e453bad5ee6900df54d6c04ea04eedd7082adb...,1550148180394,39052384e4c837793418470d982ae107acf1602d605bf6...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,1.0
8,000010504025397b03290c7457e0e7ef7ae93529f21eae...,event_product,detail,c1b20251888221d47da74a5d8cdfb78be942eb51d35371...,1550148228570,d6d98465906b804bbc534eb4ed7506470421d4b40ebba8...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,2.0
9,000010504025397b03290c7457e0e7ef7ae93529f21eae...,event_product,detail,e3281e3a9ec7ebb3c5483601abb856bc0ccf08dfd74da4...,1550148332921,536fd190fc1add8381dd010abbca1e208957ee0713e748...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,2.0
...,...,...,...,...,...,...,...,...
10430526,fffffc128ba14ec4d4b2a230b4352453843b3bb59becf5...,event_product,detail,a532a9b9d11dcb3358677f9ff2d41d95f015331acbd632...,1550718210565,369bf5eafd30de4ae1eb0c8f0a55c660266dccd85f5939...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
10430527,fffffc128ba14ec4d4b2a230b4352453843b3bb59becf5...,event_product,detail,3011fe75d8739f217851a68d4c7ab40cce7dd5031f9080...,1550718249795,53c2241664874b9e7e05515c282bc33ae6665641a985f0...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
10430528,fffffc128ba14ec4d4b2a230b4352453843b3bb59becf5...,event_product,detail,ea950a72ea131ef7181c7dd03f1ed77396648060c1e9cd...,1550718557956,400a4ea44e23f4cfec9e8129d84a9cb90a0cffecbb5406...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0
10430529,fffffc128ba14ec4d4b2a230b4352453843b3bb59becf5...,event_product,detail,bed8a2b601108932cbd8b3b14cde1d4919262c60b35cad...,1550718613724,0ca76955e075818c7eadaafddd0d7b565260fc83e84e06...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,9.0


In [30]:
validation_without_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
0,00000114e1075962f022114fcfc17f2d874e694ac5d201...,event_product,detail,cf2f88cb43c1713538f7dfd2aa498a2cb9ebc0c99feeac...,1552423391039,0aa1084eddfb08e4dffbb5a2aa98a5e9679382d982dd97...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
1,00000114e1075962f022114fcfc17f2d874e694ac5d201...,event_product,add,cf2f88cb43c1713538f7dfd2aa498a2cb9ebc0c99feeac...,1552424389158,83b4fdad686c1be4eba335f70d23ae202b84b6153e109e...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
2,00000114e1075962f022114fcfc17f2d874e694ac5d201...,event_product,detail,4945f2fa8e87cb7501702ed3dce26253296eae7a8f670f...,1552426684381,33fb2d300f6bd71508a0cddd4ca78f0302f34637feeb54...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
3,00000114e1075962f022114fcfc17f2d874e694ac5d201...,event_product,detail,6ff8d0f30bbe66cfec7d87fc7e22bd8b1defd47ff4aaaa...,1552426755233,f6f25d612ce6a6ae447c0a40bd344e0657c84048954f28...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
4,00000114e1075962f022114fcfc17f2d874e694ac5d201...,event_product,detail,cf2f88cb43c1713538f7dfd2aa498a2cb9ebc0c99feeac...,1552426869735,0aa1084eddfb08e4dffbb5a2aa98a5e9679382d982dd97...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,10.0
...,...,...,...,...,...,...,...,...
10430430,ffff6a21626693d8b10fbb157c4bb1080756abe592b6b7...,event_product,detail,b2b07ba53d4707279c1680150244e587ac113f7be2c142...,1553037343826,1cbc4a5c06f0d32db57b6ad1bec6f1394c28d56b0df7bd...,06fa312761d4b39e2f649781514ac69a4c1505c221fc46...,8.0
10430480,ffffc3943b2b105554b84d4d105bf6e6349ad2ba31c8d5...,event_product,detail,264551e2e2522aff9a5d0af4a9ae947e8da22486b96e53...,1553548438619,6ae3aefdaa469fce3f262be5cdb1b363cb29ce5e7661bf...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,10.0
10430481,ffffc3943b2b105554b84d4d105bf6e6349ad2ba31c8d5...,event_product,detail,b39ea7776df9a6d0c51cbbed60a128abdb660e93fe9bfc...,1553548475645,fa7218cc7ed7aa0f5d1dead7d4c17cd182b21bb91467f8...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,7.0
10430503,ffffeb3cebf9f8644b4ccfc0763945ca6e1525d0c77f51...,event_product,detail,5faffedaf5801444e5a0758733937c940055beb83e8958...,1552481713860,00d938dea6799202aa483c82505edb6728f8c8b2b3fed6...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,5.0


In [31]:
test_without_pageviews

Unnamed: 0,session_id_hash,event_type,product_action,product_sku_hash,server_timestamp_epoch_ms,hashed_url,category_hash,price_bucket
29,00001355930ff05e66ab30bccff221c33eba90e1517397...,event_product,detail,fa95869f053efcee615f4e02bc4fa8d13843b9e80e936f...,1554738829838,00d6784809151bb66d7b8c27b915fa03d11e1ecbfeec59...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,7.0
43,0000481159d514ba59c3ccfa77c5767c9d14ac14e99232...,event_product,detail,2c93fcdceef7261c5408351dd02ae611df0857b5dbb930...,1553886997418,1d62e75de636a20274c954ce7880c30ad58cc217cba5f6...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,9.0
44,0000481159d514ba59c3ccfa77c5767c9d14ac14e99232...,event_product,detail,8f677c04d77b056a58603fe21228f684071eb1437134bb...,1553887032398,eb24e2d7b3fc51017a17b21ba15975fd745fd344d40b2c...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,9.0
45,0000481159d514ba59c3ccfa77c5767c9d14ac14e99232...,event_product,detail,b86c8d628d229f3209b2ed29a98b6d006023f54bc43738...,1553887075557,69eade9f29ae34ae947f3b1fe1f910cd913cfcf6756d86...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,8.0
46,0000481159d514ba59c3ccfa77c5767c9d14ac14e99232...,event_product,detail,dd740f4e31ccf6c4bdc0b59e2ece04bb1e6698b1fc877c...,1553887094651,67953012f510f3aba999c288d23f54535659b93b319c12...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,8.0
...,...,...,...,...,...,...,...,...
10430511,ffffec0d60277e1ff859623244115db6f3fcfbeaf56304...,event_product,detail,45b304cd266d33857dc4387933feb38380f7cbd0dd2dc6...,1553984562456,2c2cde77820698658e277bb55311ddaf046d898fb973f2...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,9.0
10430512,ffffec0d60277e1ff859623244115db6f3fcfbeaf56304...,event_product,detail,a94a0a26b38fd8c7044be87fea51cf500b9fe61a0d3dea...,1553984613425,43e7432ccbf04bd9f5d1759c18118b1ae262681a9de5fb...,0665a81d19c89281cc00e7f7d779ded2ed42c933838602...,6.0
10430513,ffffec0d60277e1ff859623244115db6f3fcfbeaf56304...,event_product,detail,08657660c499dd08cc37b1a98d88d83b485c494990dd97...,1553984626977,2a34fd75429d9adc204d65746aa19165e2813f0e6563ac...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0
10430514,ffffec0d60277e1ff859623244115db6f3fcfbeaf56304...,event_product,detail,3c4926b7c61b4d232dc010cd6ccbd280e63ebbf543ac72...,1553984784419,12586982daef37e567dd325901ee92ee3c36fc3d01626e...,115a6a7017ee55752b8487c77dfde92b0d501d10a2e69c...,7.0


In [32]:
train_without_pageviews.to_csv(path_or_buf="../coveo_dataset/preprocessed_data/split/without_pageviews/coveo.train.csv", sep="\t", index=False)
validation_without_pageviews.to_csv(path_or_buf="../coveo_dataset/preprocessed_data/split/without_pageviews/coveo.validation.csv", sep="\t", index=False)
test_without_pageviews.to_csv(path_or_buf="../coveo_dataset/preprocessed_data/split/without_pageviews/coveo.test.csv", sep="\t", index=False)