In [19]:
import os
import sys
import json
import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from tqdm import tqdm
from collections import defaultdict, Counter

%matplotlib inline

In [20]:
path_to_df = '../data/Beauty/ratings_Beauty.csv'
df = pd.read_csv(path_to_df, names=['user_id', 'item_id', 'rating', 'timestamp'])

In [21]:
df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,A39HTATAQ9V7YF,205616461,5.0,1369699200
1,A3JM6GV9MNOF9X,558925278,3.0,1355443200
2,A1Z513UWSAAO0F,558925278,5.0,1404691200
3,A1WMRR494NWEWV,733001998,4.0,1382572800
4,A3IAAVS479H7M7,737104473,1.0,1274227200


In [22]:
df.isnull().sum()

user_id      0
item_id      0
rating       0
timestamp    0
dtype: int64

In [23]:
df.user_id.max(), df.user_id.unique().shape

('AZZZU2TD7Q3ET', (1210271,))

In [24]:
df.user_id = pd.factorize(df.user_id)[0] + 1
df.user_id.min(), df.user_id.max(), df.user_id.unique().shape

(1, 1210271, (1210271,))

In [25]:
df.item_id = pd.factorize(df.item_id)[0] + 1
df.item_id.min(), df.item_id.max(), df.item_id.unique().shape

(1, 249274, (249274,))

In [26]:
df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1,5.0,1369699200
1,2,2,3.0,1355443200
2,3,2,5.0,1404691200
3,4,3,4.0,1382572800
4,5,4,1.0,1274227200


In [9]:
sorted_timestamps = sorted(df.timestamp)
len(sorted_timestamps)

2023070

In [10]:
threshold_timestamp = sorted_timestamps[int(len(sorted_timestamps) * (1.0 - 0.2))]
test_threshold_timestamp = sorted_timestamps[int(len(sorted_timestamps) * (1.0 - 0.1))]

print(threshold_timestamp, test_threshold_timestamp)

1394323200 1400284800


In [11]:
train_data = df[df.timestamp < threshold_timestamp]
validation_data = df[(threshold_timestamp <= df.timestamp) & (df.timestamp < test_threshold_timestamp)]
test_data = df[test_threshold_timestamp <= df.timestamp]

train_data.shape, validation_data.shape, test_data.shape

((1616826, 4), (202506, 4), (203738, 4))

## Train data preprocessing

In [12]:
data = []

for _, row in tqdm(train_data.iterrows()):
    data.append({
        'user_id': int(row.user_id),
        'item_id': int(row.item_id),
        'timestamp': int(row.timestamp)
    })

print(len(data))

1616826it [01:13, 22007.36it/s]

1616826





In [13]:
user_history = defaultdict(list)
item_history = defaultdict(list)

for row in tqdm(data):
    user_raw_id = row['user_id']
    item_raw_id = row['item_id']
    interaction_timestamp = row['timestamp']
    
    user_history[user_raw_id].append({'item_id': item_raw_id, 'timestamp': interaction_timestamp})
    item_history[item_raw_id].append({'user_id': user_raw_id, 'timestamp': interaction_timestamp})


is_changed = True
threshold = 4
good_users = set()
good_items = set()


while is_changed:
    old_state = (len(good_users), len(good_items))
    
    good_users = set()
    good_items = set()

    for user_id, history in user_history.items():
        if len(history) >= threshold:
            good_users.add(user_id)

    for item_id, history in item_history.items():
        if len(history) >= threshold:
            good_items.add(item_id)
    
    user_history = {
        user_id: list(filter(lambda x: x['item_id'] in good_items, history))
        for user_id, history in user_history.items()
    }
    
    item_history = {
        item_id: list(filter(lambda x: x['user_id'] in good_users, history))
        for item_id, history in item_history.items()
    }
    
    new_state = (len(good_users), len(good_items))
    is_changed = (old_state != new_state)
    print(old_state, new_state)

100%|████████████████████████████████████████████████████████████████████| 1616826/1616826 [00:04<00:00, 384409.54it/s]


(0, 0) (62931, 68495)
(62931, 68495) (49943, 23678)
(49943, 23678) (37550, 21275)
(37550, 21275) (36157, 18294)
(36157, 18294) (34115, 17908)
(34115, 17908) (33806, 17284)
(33806, 17284) (33353, 17193)
(33353, 17193) (33283, 17072)
(33283, 17072) (33181, 17052)
(33181, 17052) (33160, 17025)
(33160, 17025) (33138, 17019)
(33138, 17019) (33136, 17007)
(33136, 17007) (33128, 17007)
(33128, 17007) (33128, 17006)
(33128, 17006) (33126, 17006)
(33126, 17006) (33126, 17006)


In [14]:
user_mapping = {}
item_mapping = {}
tmp_user_history = defaultdict(list)
tmp_item_history = defaultdict(list)

for raw_user_id, history in tqdm(user_history.items()):
    if len(history) >= threshold:
        processed_history = []

        for filtered_item_event in history:
            raw_item_id = filtered_item_event['item_id']
            item_timestamp = filtered_item_event['timestamp']

            processed_item_id = item_mapping.get(raw_item_id, len(item_mapping) + 1)
            item_mapping[raw_item_id] = processed_item_id

            processed_history.append({'item_id': processed_item_id, 'timestamp': item_timestamp})
        
        processed_user_id = user_mapping.get(raw_user_id, len(user_mapping) + 1)
        user_mapping[raw_user_id] = processed_user_id

        tmp_user_history[processed_user_id] = sorted(processed_history, key=lambda x: x['timestamp'])

    
for raw_item_id, history in tqdm(item_history.items()):
    if len(history) >= threshold:
        processed_history = []

        for filtered_user_event in history:
            raw_user_id = filtered_user_event['user_id']
            user_timestamp = filtered_user_event['timestamp']

            processed_user_id = user_mapping.get(raw_user_id, len(user_mapping) + 1)
            user_mapping[raw_user_id] = processed_user_id

            processed_history.append({'user_id': processed_user_id, 'timestamp': user_timestamp})


        processed_item_id = item_mapping.get(raw_item_id, len(item_mapping) + 1)
        item_mapping[raw_item_id] = processed_item_id

        tmp_item_history[processed_item_id] = sorted(processed_history, key=lambda x: x['timestamp'])

train_user_history = tmp_user_history
train_item_history = tmp_item_history

100%|█████████████████████████████████████████████████████████████████████| 995667/995667 [00:00<00:00, 1352809.19it/s]
100%|██████████████████████████████████████████████████████████████████████| 217231/217231 [00:00<00:00, 532429.86it/s]


In [15]:
print('Users count:', len(user_mapping))
print('Items count:', len(item_mapping))
print('Actions count:', sum(list(map(lambda x: len(x), user_history.values()))))
print('Avg train user history len:', np.mean(list(map(lambda x: len(x), train_user_history.values()))))
print('Avg train item history len:', np.mean(list(map(lambda x: len(x), train_user_history.values()))))

Users count: 33126
Items count: 17006
Actions count: 926523
Avg train user history len: 6.8857694862041905
Avg train item history len: 6.8857694862041905


In [16]:
train_data_filtered = train_data[train_data.user_id.isin(user_mapping) & train_data.item_id.isin(item_mapping)]
validation_data_filtered = validation_data[validation_data.user_id.isin(user_mapping) & validation_data.item_id.isin(item_mapping)]
test_data_filtered = test_data[test_data.user_id.isin(user_mapping) & test_data.item_id.isin(item_mapping)]

print(f'Train data. Before: {train_data.shape}. After: {train_data_filtered.shape}')
print(f'Validation data. Before: {validation_data.shape}. After: {validation_data_filtered.shape}')
print(f'Test data. Before: {test_data.shape}. After: {test_data_filtered.shape}')

Train data. Before: (1616826, 4). After: (228098, 4)
Validation data. Before: (202506, 4). After: (9622, 4)
Test data. Before: (203738, 4). After: (5104, 4)


In [21]:
# Save train data (TODO do we need to duplicate: I think yes)
with open('../data/Beauty/train_new.txt', 'w') as f:
    cnt = 0
    cnt_added = 0
    for user_id, history in train_user_history.items():
        previous_history = []
        
        for item_event in sorted(history, key=lambda x: x['timestamp']):
            if len(previous_history) + 1 >= threshold:
                f.write(' '.join([str(user_id)] + previous_history + [str(item_event['item_id'])]))
                f.write('\n')
                cnt_added += 1
            
            previous_history.append(str(item_event['item_id']))
            cnt += 1
            
    assert cnt == train_data_filtered.shape[0]
    assert cnt == cnt_added + (len(user_mapping) * (threshold - 1))

In [22]:
validation_list = []

for _, row in tqdm(validation_data_filtered.iterrows()):
    validation_list.append({
        'user_id': int(row.user_id),
        'item_id': int(row.item_id),
        'timestamp': int(row.timestamp)
    })
validation_list = sorted(validation_list, key=lambda x: x['timestamp'])
print(len(validation_list))


# Add events to `validation_user_history`
validation_user_history = defaultdict(list)
for row in tqdm(validation_list):
    user_raw_id = row['user_id']
    item_raw_id = row['item_id']
    interaction_timestamp = row['timestamp']
    validation_user_history[user_raw_id].append({'item_id': item_raw_id, 'timestamp': interaction_timestamp})

# Re-number user and item IDs
tmp_user_history = defaultdict(list)
for user_id, history in tqdm(validation_user_history.items()):
    processed_user_id = user_mapping[user_id]
    
    processed_history = []
    for item_event in history:
        item_id = item_event['item_id']
        item_timestamp = item_event['timestamp']
        processed_item_id = item_mapping[item_id]
        processed_history.append({'item_id': processed_item_id, 'timestamp': item_timestamp})
        
    tmp_user_history[processed_user_id] = sorted(processed_history, key=lambda x: x['timestamp'])
validation_user_history = tmp_user_history

9622it [00:00, 24793.39it/s]


9622


100%|██████████████████████████████████████████████████████████████████████████| 9622/9622 [00:00<00:00, 961971.57it/s]
100%|██████████████████████████████████████████████████████████████████████████| 4569/4569 [00:00<00:00, 285476.84it/s]


In [23]:
# Save validation data
with open('../data/Beauty/validation_new.txt', 'w') as f:
    cnt = 0
    for user_id, validation_history in validation_user_history.items():
        previous_history = [
            str(event['item_id']) for event in sorted(train_user_history[user_id], key=lambda x: x['timestamp'])
        ]
        assert len(previous_history) > 0
        
        for validation_item_event in  sorted(validation_history, key=lambda x: x['timestamp']):
            assert len(previous_history) + 1 >= threshold
            
            f.write(' '.join([str(user_id)] + previous_history + [str(validation_item_event['item_id'])]))
            f.write('\n')
            
            previous_history.append(str(validation_item_event['item_id']))
            cnt += 1
            
    assert cnt == validation_data_filtered.shape[0]

In [24]:
test_list = []

for _, row in tqdm(test_data_filtered.iterrows()):
    test_list.append({
        'user_id': int(row.user_id),
        'item_id': int(row.item_id),
        'timestamp': int(row.timestamp)
    })
test_list = sorted(test_list, key=lambda x: x['timestamp'])
print(len(test_list))

# Add events to `test_user_history`
test_user_history = defaultdict(list)
for row in tqdm(test_list):
    user_raw_id = row['user_id']
    item_raw_id = row['item_id']
    interaction_timestamp = row['timestamp']
    test_user_history[user_raw_id].append({'item_id': item_raw_id, 'timestamp': interaction_timestamp})

# Re-number user and item IDs
tmp_user_history = defaultdict(list)
for user_id, history in tqdm(test_user_history.items()):
    processed_user_id = user_mapping[user_id]
    
    processed_history = []
    for item_event in history:
        item_id = item_event['item_id']
        item_timestamp = item_event['timestamp']
        processed_item_id = item_mapping[item_id]
        processed_history.append({'item_id': processed_item_id, 'timestamp': item_timestamp})
        
    tmp_user_history[processed_user_id] = sorted(processed_history, key=lambda x: x['timestamp'])
test_user_history = tmp_user_history

5104it [00:00, 24651.41it/s]


5104


100%|█████████████████████████████████████████████████████████████████████████| 5104/5104 [00:00<00:00, 1020533.33it/s]
100%|██████████████████████████████████████████████████████████████████████████| 2910/2910 [00:00<00:00, 264490.10it/s]


In [27]:
# Save test data
with open('../data/Beauty/test_new.txt', 'w') as f:
    cnt = 0
    for user_id, test_history in test_user_history.items():
        train_history = [
            str(event['item_id']) for event in sorted(train_user_history[user_id], key=lambda x: x['timestamp'])
        ]
        validation_history = [
            str(event['item_id']) for event in sorted(validation_user_history[user_id], key=lambda x: x['timestamp'])
        ]
        previous_history = train_history + validation_history
        assert len(train_history) > 0
        
        for test_item_event in sorted(test_history, key=lambda x: x['timestamp']):
            f.write(' '.join([str(user_id)] + previous_history + [str(test_item_event['item_id'])]))
            f.write('\n')
            
            previous_history.append(str(test_item_event['item_id']))
            cnt += 1
            
    assert cnt == test_data_filtered.shape[0]

### All data

In [9]:
df.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1,5.0,1369699200
1,2,2,3.0,1355443200
2,3,2,5.0,1404691200
3,4,3,4.0,1382572800
4,5,4,1.0,1274227200


In [10]:
data = []

for _, row in tqdm(df.iterrows()):
    data.append({
        'user_id': int(row.user_id),
        'item_id': int(row.item_id),
        'timestamp': int(row.timestamp)
    })

print(len(data))

2023070it [01:21, 24916.59it/s]

2023070





In [11]:
user_history = defaultdict(list)
item_history = defaultdict(list)

for row in tqdm(data):
    user_raw_id = row['user_id']
    item_raw_id = row['item_id']
    interaction_timestamp = row['timestamp']
    
    user_history[user_raw_id].append({'item_id': item_raw_id, 'timestamp': interaction_timestamp})
    item_history[item_raw_id].append({'user_id': user_raw_id, 'timestamp': interaction_timestamp})

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2023070/2023070 [00:04<00:00, 467192.47it/s]


In [12]:
is_changed = True
threshold = 5
good_users = set()
good_items = set()


while is_changed:
    old_state = (len(good_users), len(good_items))
    
    good_users = set()
    good_items = set()

    for user_id, history in user_history.items():
        if len(history) >= threshold:
            good_users.add(user_id)

    for item_id, history in item_history.items():
        if len(history) >= threshold:
            good_items.add(item_id)
    
    user_history = {
        user_id: list(filter(lambda x: x['item_id'] in good_items, history))
        for user_id, history in user_history.items()
    }
    
    item_history = {
        item_id: list(filter(lambda x: x['user_id'] in good_users, history))
        for item_id, history in item_history.items()
    }
    
    new_state = (len(good_users), len(good_items))
    is_changed = (old_state != new_state)
    print(old_state, new_state)

(0, 0) (52374, 67345)
(52374, 67345) (40226, 19369)
(40226, 19369) (27501, 17041)
(27501, 17041) (26116, 13727)
(26116, 13727) (23746, 13318)
(23746, 13318) (23436, 12562)
(23436, 12562) (22787, 12458)
(22787, 12458) (22705, 12247)
(22705, 12247) (22505, 12224)
(22505, 12224) (22480, 12153)
(22480, 12153) (22408, 12140)
(22408, 12140) (22401, 12116)
(22401, 12116) (22374, 12114)
(22374, 12114) (22372, 12103)
(22372, 12103) (22364, 12103)
(22364, 12103) (22364, 12101)
(22364, 12101) (22363, 12101)
(22363, 12101) (22363, 12101)


In [13]:
user_mapping = {}
item_mapping = {}
tmp_user_history = defaultdict(list)
tmp_item_history = defaultdict(list)

for user_id, history in tqdm(user_history.items()):
    processed_history = []

    for filtered_item in history:
        item_id = filtered_item['item_id']
        item_timestamp = filtered_item['timestamp']

        processed_item_id = item_mapping.get(item_id, len(item_mapping) + 1)
        item_mapping[item_id] = processed_item_id

        processed_history.append({'item_id': processed_item_id, 'timestamp': item_timestamp})
        
    if len(processed_history) >= threshold:
        processed_user_id = user_mapping.get(user_id, len(user_mapping) + 1)
        user_mapping[user_id] = processed_user_id

        tmp_user_history[processed_user_id] = sorted(processed_history, key=lambda x: x['timestamp'])

    
for item_id, history in tqdm(item_history.items()):
    processed_history = []

    for filtered_user in history:
        user_id = filtered_user['user_id']
        user_timestamp = filtered_user['timestamp']

        processed_user_id = user_mapping.get(user_id, len(user_mapping) + 1)
        user_mapping[user_id] = processed_user_id

        processed_history.append({'user_id': processed_user_id, 'timestamp': user_timestamp})

    if len(processed_history) >= threshold:
        processed_item_id = item_mapping.get(item_id, len(item_mapping) + 1)
        item_mapping[item_id] = processed_item_id

        tmp_item_history[processed_item_id] = sorted(processed_history, key=lambda x: x['timestamp'])

user_history = tmp_user_history
item_history = tmp_item_history

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1210271/1210271 [00:01<00:00, 830479.23it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 249274/249274 [00:00<00:00, 565247.80it/s]


In [14]:
print('Users count:', len(user_mapping))
print('Items count:', len(item_mapping))
print('Actions count:', sum(list(map(lambda x: len(x), user_history.values()))))
print('Avg user history len:', np.mean(list(map(lambda x: len(x), user_history.values()))))
print('Avg item history len:', np.mean(list(map(lambda x: len(x), item_history.values()))))

Users count: 22363
Items count: 12101
Actions count: 198502
Avg user history len: 8.876358270357287
Avg item history len: 16.403768283612923


In [15]:
with open('../data/Beauty/all_data.txt', 'w') as f:
    for user_id, item_history in user_history.items():
        f.write(' '.join([str(user_id)] + [
            str(item_event['item_id']) for item_event in sorted(item_history, key=lambda x: x['timestamp'])
        ]))
        f.write('\n')

In [30]:
path_to_df = '../data/Beauty/Beauty_5.json'

df = defaultdict(list)

with open(path_to_df, 'r') as f:
    for line in f.readlines():
        review = json.loads(line)
        df['user_id'].append(review['reviewerID'])
        df['item_id'].append(review['asin'])
        df['timestamp'].append(review['unixReviewTime'])

print(f'Number of events: {len(df["user_id"])}')

df = pd.DataFrame.from_dict(df)

Number of events: 198502


In [31]:
df.head()

Unnamed: 0,user_id,item_id,timestamp
0,A1YJEY40YUW4SE,7806397051,1391040000
1,A60XNB876KYML,7806397051,1397779200
2,A3G6XNM240RMWA,7806397051,1378425600
3,A1PQFP6SAJ6D80,7806397051,1386460800
4,A38FVHZTNQ271F,7806397051,1382140800


In [32]:
df.isnull().sum()

user_id      0
item_id      0
timestamp    0
dtype: int64

In [33]:
df.user_id.max(), df.user_id.unique().shape

('AZZZLM1E5JJ8C', (22363,))

In [34]:
df.user_id = pd.factorize(df.user_id)[0] + 1
df.user_id.min(), df.user_id.max(), df.user_id.unique().shape

(1, 22363, (22363,))

In [35]:
df.item_id = pd.factorize(df.item_id)[0] + 1
df.item_id.min(), df.item_id.max(), df.item_id.unique().shape

(1, 12101, (12101,))

In [36]:
df.head()

Unnamed: 0,user_id,item_id,timestamp
0,1,1,1391040000
1,2,1,1397779200
2,3,1,1378425600
3,4,1,1386460800
4,5,1,1382140800


In [37]:
df.user_id.nunique()

22363

In [38]:
df.item_id.nunique()

12101

In [39]:
df.head()

Unnamed: 0,user_id,item_id,timestamp
0,1,1,1391040000
1,2,1,1397779200
2,3,1,1378425600
3,4,1,1386460800
4,5,1,1382140800


In [40]:
data = []

for _, row in tqdm(df.iterrows()):
    data.append({
        'user_id': int(row.user_id),
        'item_id': int(row.item_id),
        'timestamp': int(row.timestamp)
    })

print(len(data))

198502it [00:07, 25402.56it/s]

198502





In [41]:
user_history = defaultdict(list)
item_history = defaultdict(list)

for row in tqdm(data):
    user_raw_id = row['user_id']
    item_raw_id = row['item_id']
    interaction_timestamp = row['timestamp']
    
    user_history[user_raw_id].append({'item_id': item_raw_id, 'timestamp': interaction_timestamp})
    item_history[item_raw_id].append({'user_id': user_raw_id, 'timestamp': interaction_timestamp})

100%|██████████████████████████████████████████████████████████████████████| 198502/198502 [00:00<00:00, 672833.04it/s]


In [42]:
is_changed = True
threshold = 5
good_users = set()
good_items = set()


while is_changed:
    old_state = (len(good_users), len(good_items))
    
    good_users = set()
    good_items = set()

    for user_id, history in user_history.items():
        if len(history) >= threshold:
            good_users.add(user_id)

    for item_id, history in item_history.items():
        if len(history) >= threshold:
            good_items.add(item_id)
    
    user_history = {
        user_id: list(filter(lambda x: x['item_id'] in good_items, history))
        for user_id, history in user_history.items()
    }
    
    item_history = {
        item_id: list(filter(lambda x: x['user_id'] in good_users, history))
        for item_id, history in item_history.items()
    }
    
    new_state = (len(good_users), len(good_items))
    is_changed = (old_state != new_state)
    print(old_state, new_state)

(0, 0) (22363, 12101)
(22363, 12101) (22363, 12101)


In [43]:
user_mapping = {}
item_mapping = {}
tmp_user_history = defaultdict(list)
tmp_item_history = defaultdict(list)

for user_id, history in tqdm(user_history.items()):
    processed_history = []

    for filtered_item in history:
        item_id = filtered_item['item_id']
        item_timestamp = filtered_item['timestamp']

        processed_item_id = item_mapping.get(item_id, len(item_mapping) + 1)
        item_mapping[item_id] = processed_item_id

        processed_history.append({'item_id': processed_item_id, 'timestamp': item_timestamp})
        
    if len(processed_history) >= threshold:
        processed_user_id = user_mapping.get(user_id, len(user_mapping) + 1)
        user_mapping[user_id] = processed_user_id

        tmp_user_history[processed_user_id] = sorted(processed_history, key=lambda x: x['timestamp'])

    
for item_id, history in tqdm(item_history.items()):
    processed_history = []

    for filtered_user in history:
        user_id = filtered_user['user_id']
        user_timestamp = filtered_user['timestamp']

        processed_user_id = user_mapping.get(user_id, len(user_mapping) + 1)
        user_mapping[user_id] = processed_user_id

        processed_history.append({'user_id': processed_user_id, 'timestamp': user_timestamp})

    if len(processed_history) >= threshold:
        processed_item_id = item_mapping.get(item_id, len(item_mapping) + 1)
        item_mapping[item_id] = processed_item_id

        tmp_item_history[processed_item_id] = sorted(processed_history, key=lambda x: x['timestamp'])

user_history = tmp_user_history
item_history = tmp_item_history

100%|█████████████████████████████████████████████████████████████████████████| 22363/22363 [00:00<00:00, 90005.73it/s]
100%|█████████████████████████████████████████████████████████████████████████| 12101/12101 [00:00<00:00, 65768.00it/s]


In [44]:
print('Users count:', len(user_mapping))
print('Items count:', len(item_mapping))
print('Actions count:', sum(list(map(lambda x: len(x), user_history.values()))))
print('Avg user history len:', np.mean(list(map(lambda x: len(x), user_history.values()))))
print('Avg item history len:', np.mean(list(map(lambda x: len(x), item_history.values()))))

Users count: 22363
Items count: 12101
Actions count: 198502
Avg user history len: 8.876358270357287
Avg item history len: 16.403768283612923


In [45]:
with open('../data/Beauty/all_data.txt', 'w') as f:
    for user_id, item_history in user_history.items():
        f.write(' '.join([str(user_id)] + [
            str(item_event['item_id']) for item_event in sorted(item_history, key=lambda x: x['timestamp'])
        ]))
        f.write('\n')