### Recommendations Engine Light FM Version
This is the main file for the recommendations engine that uses LightFM to predict recommendations for the user.


Note: Try and use this on a linux environment to make sure that everything is working properly.

In [199]:
import pandas as pd
import numpy as np
from scipy import sparse
from lightfm import LightFM
from lightfm.data import  Dataset

In [200]:
# Load the course datasets
course_df = pd.read_csv('data/courses.csv')
course_df.drop(columns=['createdAt', 'updatedAt', 'deletedAt', 'description'], axis=1, inplace=True)
course_interaction_df = pd.read_csv('data/course_interactions.csv')
course_interaction_df.drop(columns=['id','createdAt', 'updatedAt', 'deletedAt'], axis=1, inplace=True)

In [201]:
# Group the course interactions by user and course and add a interactions column
course_interaction_df = course_interaction_df.groupby(['userId', 'courseId']).size().reset_index(name='interactions')
course_interaction_df

Unnamed: 0,userId,courseId,interactions
0,2,53,1
1,2,54,1
2,2,55,1
3,2,56,1
4,2,100,3
...,...,...,...
97,7,4730,1
98,7,4741,1
99,7,4764,1
100,7,4789,1


In [202]:
# Create a lightfm dataset
dataset = Dataset()

# Fit the dataset using the course_interaction dataframe
dataset.fit_partial(
    users=course_interaction_df['userId'],
    items=course_df['id'],
    item_features=course_df['name'],
    user_features=course_interaction_df['interactions']
)

In [203]:
# Check the current shape of the dataset
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items {}.'.format(num_users, num_items))

Num users: 6, num_items 3255.


In [204]:
(interactions, weights) = dataset.build_interactions((
    (x['userId'], x['courseId']) for _, x in course_interaction_df.iterrows()))

print(repr(interactions))

<6x3255 sparse matrix of type '<class 'numpy.int32'>'
	with 102 stored elements in COOrdinate format>


In [205]:
item_features = dataset.build_item_features(((x['id'],[x['name']]) for _, x in course_df.iterrows()))

print(repr(item_features))

<3255x6510 sparse matrix of type '<class 'numpy.float32'>'
	with 6510 stored elements in Compressed Sparse Row format>


In [206]:
user_features = dataset.build_user_features(((x['userId'],[x['interactions']]) for _, x in course_interaction_df.iterrows()))
print(repr(user_features))

<6x7 sparse matrix of type '<class 'numpy.float32'>'
	with 16 stored elements in Compressed Sparse Row format>


In [207]:
model = LightFM(loss='warp', learning_rate=0.05)
model.fit(interactions, 
          item_features=item_features, 
          user_features=user_features,
          epochs=30,)

<lightfm.lightfm.LightFM at 0x7f303e80a050>

In [208]:
n_users, n_items = interactions.shape

scores = model.predict(0, np.arange(n_items))
top_items = course_df['name'][np.argsort(-scores)]

In [209]:
# Try and predict the top 10 courses for a non-existent user
user_feature_map = {
    'interactions': 0,
}
user_feature_list = ['interactions:0']

def format_newuser_input(user_feature_map, user_feature_list):
  #user_feature_map = user_feature_map  
  num_features = len(user_feature_list)
  normalised_val = 1.0 
  target_indices = []
  for feature in user_feature_list:
    try:
        target_indices.append(user_feature_map[feature])
    except KeyError:
        print("new user feature encountered '{}'".format(feature))
        pass
  new_user_features = np.zeros(len(user_feature_map.keys()))
  for i in target_indices:
    new_user_features[i] = normalised_val
  new_user_features = sparse.csr_matrix(new_user_features)
  return(new_user_features)


In [210]:
new_user_features = format_newuser_input(user_feature_map, user_feature_list)
new_user_features.todense()

new user feature encountered 'interactions:0'


matrix([[0.]])

In [215]:
scores = model.predict(0, np.arange(n_items), user_features=new_user_features)
top_items = course_df['name'][np.argsort(-scores)]
top_items[:10]


3115       Kotlin Bootcamp for Programmers | Free Courses
3123    17 Best Courses/Resources to Learn Kotlin Prog...
3111       Top Kotlin Courses Online - Updated [May 2023]
3109                  About the course - Java Programming
3162    Learn Engineering With Online Courses, Classes...
3160               Best Courses Programs in Aviation 2023
12      Free Java Course Online for Beginners | Java P...
3104        Java Programming for Beginners | Free Courses
15      10 Best Java Courses and Certifications for Be...
3107    Java Certification Training Course in Manila, ...
Name: name, dtype: object

In [211]:
interactions.shape

(6, 3255)

#### Let's start saving the model so we can use it later on

In [212]:
import pickle

# Save the dataset
with open('data/dataset.pickle', 'wb') as f:
    pickle.dump(dataset, f, protocol=pickle.HIGHEST_PROTOCOL)
    

with open('data/model.pickle', 'wb') as f:
    pickle.dump(model, f, protocol=pickle.HIGHEST_PROTOCOL)

In [213]:
# Let's try opening the saved model and see if it works
from typing import cast
with open('data/model.pickle', 'rb') as f:
    loaded_model = cast(LightFM, pickle.load(f))
    scores = loaded_model.predict(0, np.arange(n_items))
    top_items = course_df['name'][np.argsort(-scores)]
    print(top_items.head(10))

3115       Kotlin Bootcamp for Programmers | Free Courses
3123    17 Best Courses/Resources to Learn Kotlin Prog...
3104        Java Programming for Beginners | Free Courses
3109                  About the course - Java Programming
15      10 Best Java Courses and Certifications for Be...
16           Learn Java – Free Java Courses for Beginners
3162    Learn Engineering With Online Courses, Classes...
12      Free Java Course Online for Beginners | Java P...
3155         Android Development with Kotlin Course Badge
3134          Railway Engineering, Basic Course - TBA4225
Name: name, dtype: object
