In [1]:
import pandas as pd
import json
import random

### Data Loading

In [2]:
sessions = pd.read_csv('data/sessions.csv')
sessions = sessions.sample(10000) # neem niet de hele csv maar slechts 10000 random 

In [11]:
prev_viewed = pd.read_csv('data/profiles_previously_viewed.csv', encoding = "ISO-8859-1") # zorg dat je de juiste encoding gebruikt om de data te lezen.

In [9]:
profiles = pd.read_csv('data/profiles.csv')
profiles = profiles[profiles["id"].isin(sessions["profid"])] # only take the profiles which occurred in the chosen sessions

In [10]:
products = pd.read_csv('data/products.csv', encoding = "ISO-8859-1")

### User 2 user (collaborative filtering)

In [6]:
# groepeer per segment, en verzamel similar users
similar_users = sessions.groupby('segment')['profid'].apply(list).reset_index()

In [7]:
def get_segment_for_profid(sessions, profid):
    """Deze functie get het segment voor een gegeven profile."""
    
    segment = sessions[sessions['profid']==profid]['segment']
    
    try:
        return segment.iloc[0]
    except:
        return
    
    
def get_prev_viewed_for_profid(prev_viewed, profid):
    """Deze functie get de previously viewed products voor een gegeven profile."""
    viewed_products = prev_viewed[prev_viewed['profid']==profid]
    return viewed_products


def recommend_user2user(prev_viewed, to_recommend_profile):
    """Deze functie doet een user2user recommendation."""
    
    # bepaal eerst in welk segment de user zit
    segment = get_segment_for_profid(sessions, to_recommend_profile)

    # pak vervolgens alle users in dat segment BEHALVE de user zelf
    users_in_segment = similar_users[similar_users['segment']==segment]['profid'].iloc[0]
    users_in_segment = set(users_in_segment)
    users_in_segment = users_in_segment - set([to_recommend_profile])
    users_in_segment = list(users_in_segment)

    # loop door alle users in dat segment TOTDAT er iemand is die óók products heeft bekeken.
    # wanneer je die user vindt, recommenden we simpelweg die producten.
    for u in users_in_segment:
        viewed = get_prev_viewed_for_profid(prev_viewed, u)

        if len(viewed) >= 1:
            recommendations = viewed[['propid']].reset_index(drop=True)
            recommendations.columns = ['recommended_products']
            return recommendations

In [12]:
profiles

Unnamed: 0,id,latestactivity,segment
0,5a393d68ed295900010384ca,2019-01-13 14:08:33.995000,BOUNCER
63,5a397129a825610001bbb8b5,2019-01-21 18:22:19.123000,BROWSER
283,5a3a12fced2959000104238e,2018-09-12 14:56:00.233000,BROWSER
426,5a3a2444ed29590001043354,2018-11-19 08:24:06.722000,JUDGER
544,5a3a2e20a825610001bc2bcd,2017-12-20 09:33:18.266000,
...,...,...,...
2079591,5c482b8849b5900001f411b3,2019-01-23 08:54:52.371000,
2079930,5c483e4049b5900001f42c25,2019-01-23 10:13:30.143000,
2080012,5c4734a2d8f58d0001a0f4ec,2019-01-22 15:20:40.511000,BROWSER
2080188,5c473eb9d8f58d0001a10246,,


In [13]:
to_recommend_profile = "5a393d68ed295900010384ca"
recommend_user2user(prev_viewed, to_recommend_profile)

Unnamed: 0,recommended_products
0,39408
1,35419
2,35431


### Item 2 item (content filtering)

In [14]:
def get_product_category_for_pid(products, pid):
    category = products[products['id']==pid]["category"]
    
    try:
        return category.iloc[0]
    except:
        return

In [15]:
def recommend_item2item(prev_viewed, similar_products, products, to_recommend_profile, amount=4):
    """Deze functie doet een recommendation obv item2item."""
    
    # check welke producten de user voorheen heeft bekeken
    previouly_viewed_by_user = get_prev_viewed_for_profid(prev_viewed, to_recommend_profile)

    # als er producten bekeken zijn, pak dan het eerste product.
    if len(previouly_viewed_by_user) >= 1:
        product_to_compare = previouly_viewed_by_user.iloc[0]['propid']
        
    else: # als er geen producten bekeken zijn, kunnen we geen item2item recommendation maken.
        print("De user heeft nog niks bekeken of bestaat niet.")
        return
    
    # get de category voor dat specifieke product
    category = get_product_category_for_pid(products, product_to_compare)

    # check welke andere producten er in die categorie zitten
    products_in_category = set(similar_products[similar_products['category']==category]['id'].iloc[0])
    
    # pak ALLE producten uit die category, BEHALVE de bekeken producten
    recommended_products = products_in_category - set(previouly_viewed_by_user['propid'])
    
    # recommend een x aantal random producten uit diezelfde categorie.
    try:
        recommended_products = random.sample(recommended_products, amount)
        return recommended_products
    except:
        print('Er ging iets mis.')
        return

In [16]:
# groepeer per category, en verzamel similar products
similar_products = products.groupby('category')["id"].apply(list).reset_index()


In [19]:
to_recommend_profile = "5a393d68ed295900010384ca"
recommend_item2item(prev_viewed, similar_products, products, to_recommend_profile, amount=4)

since Python 3.9 and will be removed in a subsequent version.
  recommended_products = random.sample(recommended_products, amount)


['21239', '26378', '38423-3pack', '40320']