### Recommendations Engine Light FM Version
This is the main file for the recommendations engine that uses LightFM to predict recommendations for the user.

In [1]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.data import  Dataset



In [2]:
# Load the course datasets
course_df = pd.read_csv('data/courses.csv')
course_df.drop(columns=['createdAt', 'updatedAt', 'deletedAt', 'description'], axis=1, inplace=True)
course_interaction_df = pd.read_csv('data/course_interactions.csv')
course_interaction_df.drop(columns=['id','createdAt', 'updatedAt', 'deletedAt'], axis=1, inplace=True)


In [3]:
# Group the course interactions by user and course and add a interactions column
course_interaction_df = course_interaction_df.groupby(['userId', 'courseId']).size().reset_index(name='interactions')
course_interaction_df


Unnamed: 0,userId,courseId,interactions
0,2,53,1
1,2,54,1
2,2,55,1
3,2,56,1
4,2,100,3
...,...,...,...
97,7,4730,1
98,7,4741,1
99,7,4764,1
100,7,4789,1


In [4]:
# test_df = pd.DataFrame(list(itertools.product(set(course_interaction_df['userId']), set(course_df['id']))), columns=['userId', 'courseId']).merge(course_interaction_df, how='left', on=['userId', 'courseId'])
# test_df.fillna(0, inplace=True)
# test_df

In [5]:
# Create a lightfm dataset
dataset = Dataset()

# Fit the dataset using the course_interaction dataframe
dataset.fit(users=course_interaction_df['userId']
            ,items=course_interaction_df['courseId'])

dataset.fit_partial(
    items=course_df['id'],
    item_features=course_df['name']
)

In [6]:
# Check the current shape of the dataset
num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items {}.'.format(num_users, num_items))

Num users: 6, num_items 3255.


In [7]:
(interactions, weights) = dataset.build_interactions((
    (x['userId'], x['courseId']) for _, x in course_interaction_df.iterrows()))

print(repr(interactions))

<6x3255 sparse matrix of type '<class 'numpy.int32'>'
	with 102 stored elements in COOrdinate format>


In [8]:
item_features = dataset.build_item_features(((x['id'],[x['name']]) for _, x in course_df.iterrows()))

print(repr(item_features))

<3255x6510 sparse matrix of type '<class 'numpy.float32'>'
	with 6510 stored elements in Compressed Sparse Row format>


In [9]:
model = LightFM(loss='warp')
model.fit(interactions, item_features=item_features, epochs=10)

: 

: 

In [None]:
n_users, n_items = interactions.shape

scores = model.predict(5, np.arange(n_items))
top_items = course_df['name'][np.argsort(-scores)]
top_items[:10]