The code here follows the blogpost: https://ml-gis-service.com/index.php/2023/09/23/which-movie-should-you-recommend-next-session-based-recommendation-engine-in-python-part-1/

In [5]:
import numpy as np
from wsknn import fit
from wsknn.preprocessing.parse_static import parse_flat_file

In [2]:
fpath = 'ml-100k/u.data'
ds = parse_flat_file(fpath, sep='\t', session_index=0, product_index=1, time_index=3, time_to_numeric=True)

In [8]:
print(ds[1])

Sessions object statistics:
*Number of unique sessions: 943
*The longest event stream size per session: 737
*Period start: 1997-09-20T05:05:10.000000Z
*Period end: 1998-04-23T01:10:38.000000Z


In [26]:
def train_validate_samples(set_of_sessions):
    
    sessions_keys = list(set_of_sessions.keys())
    n_sessions = int(0.1 * len(sessions_keys))
    key_sample = np.random.choice(sessions_keys, n_sessions)
    
    training_set = {_key: set_of_sessions[_key] for _key in sessions_keys if _key not in key_sample}
    validation_set = [set_of_sessions[_key] for _key in key_sample]
    
    return training_set, validation_set

In [27]:
training_ds, validation_ds = train_validate_samples(ds[1].session_items_actions_map)

In [25]:
model = fit(sessions=training_ds,
            number_of_recommendations=5,
            number_of_neighbors=10,
            sampling_strategy='recent',
            sample_size=50,
            weighting_func='log',
            ranking_strategy='log',
            return_events_from_session=False,
            recommend_any=False)

In [28]:
def get_movie_name(movie_id: str):
    with open('ml-100k/u.item', 'r', encoding = "ISO-8859-1") as fin:
        for line in fin:
            splitted = line.split('|')
            if movie_id == splitted[0]:
                return splitted[1]

In [29]:
for ts in validation_ds[:3]:
    print('User watched')
    print(str([get_movie_name(x) for x in ts[0]]))
    print('Recommendations')
    recs = model.recommend(ts)
    for rec in recs:
        print('Item:', get_movie_name(rec[0]), '| weight:', rec[1])
    print('---')
    print('')

User watched
['French Twist (Gazon maudit) (1995)', 'Sabrina (1954)', 'Brazil (1985)', 'Laura (1944)', 'Twelve Monkeys (1995)', 'Fargo (1996)', 'Smoke (1995)', 'Sunset Blvd. (1950)', 'Secrets & Lies (1996)', 'Bonnie and Clyde (1967)', 'Evita (1996)', 'Boogie Nights (1997)', 'Dial M for Murder (1954)', 'Notorious (1946)', 'Manchurian Candidate, The (1962)', 'Secret of Roan Inish, The (1994)', 'Stand by Me (1986)', 'Mother (1996)', 'Hoop Dreams (1994)', 'Unforgiven (1992)', 'Cape Fear (1991)', 'Lone Star (1996)', 'Emma (1996)', 'Get Shorty (1995)', 'House of the Spirits, The (1993)', 'Braveheart (1995)', 'Dirty Dancing (1987)', 'Liar Liar (1997)', 'M (1931)', 'Shawshank Redemption, The (1994)', 'Barcelona (1994)', '39 Steps, The (1935)', 'Shining, The (1980)', 'Sling Blade (1996)', 'Glory (1989)', 'Substance of Fire, The (1996)', "Singin' in the Rain (1952)", 'City Hall (1996)', 'Clueless (1995)', 'My Left Foot (1989)', '2001: A Space Odyssey (1968)', 'African Queen, The (1951)', 'Abyss,