In [1]:
import pandas as pd

#Load processed features
user_features = pd.read_csv("C:/Users/Alikh/recsys-project/sql/users_features.csv")
item_features = pd.read_csv("C:/Users/Alikh/recsys-project/sql/item_features.csv")
interactions = pd.read_csv("C:/Users/Alikh/recsys-project/sql/interactions.csv")

print(user_features.head())
print(item_features.head())
print(interactions.head())

   user_id  age country  country_encoded
0        1   42      JP                0
1        2   38      JP                0
2        3   31      JP                0
3        4   20      NL                1
4        5   42      US                2
   item_id    title  genre_Action  genre_Comedy  genre_Drama  genre_Sci-Fi
0        1  Movie 1         False         False        False          True
1        2  Movie 2         False         False        False          True
2        3  Movie 3         False         False        False          True
3        4  Movie 4          True         False        False         False
4        5  Movie 5         False         False        False          True
   user_id  item_id  rating
0        7        7       4
1        4       11       3
2        1        7       1
3        6        1       2
4        5       10       5


In [2]:
#Build interaction matrix
#LightFM expects sparse matrix

from scipy.sparse import coo_matrix

n_users = user_features["user_id"].nunique()
n_items = item_features["item_id"].nunique()

#Map IDs to index
user_id_map = {uid: i for i, uid in enumerate(user_features["user_id"])}
item_id_map = {iid: i for i, iid in enumerate(item_features["item_id"])}

#Build COO matrix
rows = interactions["user_id"].map(user_id_map)
cols = interactions["item_id"].map(item_id_map)
ratings = interactions["rating"]


interaction_matrix = coo_matrix(
    (ratings, (rows, cols)),
    shape=(n_users, n_items)
)

print(interaction_matrix.shape)

(10, 15)


In [3]:
import numpy as np
import pandas as pd
from lightfm import LightFM
from lightfm.data import Dataset

# Example data
users = pd.DataFrame({
    "user_id": [1, 2, 3, 4, 5],
    "age": [48, 27, 49, 34, 30],
    "country": ["NL", "KZ", "KZ", "KZ", "US"]
})

items = pd.DataFrame({
    "item_id": [1, 2, 3, 4, 5],
    "title": ["Movie 1", "Movie 2", "Movie 3", "Movie 4", "Movie 5"],
    "genre": ["Sci-Fi", "Drama", "Action", "Sci-Fi", "Drama"]
})

interactions = pd.DataFrame({
    "user_id": [2, 5, 3, 1, 4],
    "item_id": [4, 2, 5, 3, 1],
    "rating": [2, 3, 4, 5, 2]
})

# Build dataset object
dataset = Dataset()
dataset.fit(users["user_id"], items["item_id"])

(interactions_matrix, weights_matrix) = dataset.build_interactions(
    [(u, i, r) for u, i, r in interactions.values]
)




In [4]:
# Warp = ranking optimization, works well for implicit feedback
model = LightFM(loss="warp")
model.fit(interactions_matrix, epochs=20, num_threads=2)


: 

In [1]:
import pandas as pd

# load interactions
interactions = pd.read_csv("C:/Users/Alikh/recsys-project/sql/interactions.csv")
items = pd.read_csv("C:/Users/Alikh/recsys-project/sql/item_features.csv")

# count how many times each item was interacted with
popularity = interactions.groupby("item_id").size().reset_index(name="count")
popularity = popularity.sort_values("count", ascending=False)

def recommend(user_id, n=5):
    """Return top-n most popular items"""
    recs = popularity.head(n).merge(items, on="item_id", how="left")
    return recs[["item_id", "title"]]

# test
print(recommend(user_id=1, n=5))


   item_id    title
0        4  Movie 4
1        7  Movie 7
2        1  Movie 1
3        2  Movie 2
4        3  Movie 3
