In [None]:
!pip install lightfm

In [None]:
import numpy as np
from lightfm import LightFM
from scipy.sparse import coo_matrix
import pandas as pd

In [None]:
# Extract restaurant set
restaurant_set = set()
listres = []
for kw in train_data['np2rests'].keys():
    listres.extend(train_data['np2rests'][kw].keys())
restaurant_set = set(listres)

# Convert sets to lists for indexing
keyword_set = list(keyword_set)
restaurant_set = list(restaurant_set)
restaurants = len(listres)
num_keywords = len(keyword_set)
num_restaurants = len(restaurant_set)

In [None]:
# Extract users
train_users, train_users2kw = extract_users(train_data['np2users'])
num_users = len(train_users)

# Create user-restaurant interaction matrix (sparse)
# First, build user-keyword interactions
user_keyword_data = []
user_keyword_row = []
user_keyword_col = []
for user_idx, kws in enumerate(train_users2kw):
    for kw in kws:
        if kw in keyword_set:
            kw_idx = keyword_set.index(kw)
            user_keyword_data.append(1)
            user_keyword_row.append(user_idx)
            user_keyword_col.append(kw_idx)
user_keyword_matrix = coo_matrix((user_keyword_data, (user_keyword_row, user_keyword_col)), shape=(num_users, num_keywords))

In [None]:
# Create keyword-restaurant feature matrix
keyword_restaurant_data = []
keyword_restaurant_row = []
keyword_restaurant_col = []
for kw in train_data['np2rests'].keys():
    kw_idx = keyword_set.index(kw)
    for res in train_data['np2rests'][kw].keys():
        res_idx = restaurant_set.index(res)
        keyword_restaurant_data.append(1)
        keyword_restaurant_row.append(kw_idx)
        keyword_restaurant_col.append(res_idx)
keyword_restaurant_matrix = coo_matrix((keyword_restaurant_data, (keyword_restaurant_row, keyword_restaurant_col)), shape=(num_keywords, num_restaurants))

In [None]:
# Incorporate ratings from edinburgh.csv if available
ratings_df = pd.read_csv("edinburgh.csv")
user_restaurant_data = []
user_restaurant_row = []
user_restaurant_col = []
for _, row in ratings_df.iterrows():
    user = row['user_id']
    restaurant = row['rest_id']
    rating = row['rating']
    if user in train_users and restaurant in restaurant_set:
        user_idx = train_users.index(user)
        res_idx = restaurant_set.index(restaurant)
        user_restaurant_data.append(rating)
        user_restaurant_row.append(user_idx)
        user_restaurant_col.append(res_idx)
user_restaurant_matrix = coo_matrix((user_restaurant_data, (user_restaurant_row, user_restaurant_col)), shape=(num_users, num_restaurants))

In [None]:
# Initialize LightFM model
model = LightFM(loss='warp', no_components=64, learning_rate=0.05)

# Fit the model with item features (keywords)
model.fit(
    interactions=user_restaurant_matrix,
    item_features=keyword_restaurant_matrix,
    epochs=30,
    num_threads=4,
    verbose=True
)

# Generate keyword-restaurant score matrix to replace 'a'
# Predict scores for all keyword-restaurant pairs
keyword_indices = np.arange(num_keywords)
restaurant_indices = np.arange(num_restaurants)
keyword_restaurant_scores = np.zeros((num_keywords, num_restaurants))
for kw_idx in keyword_indices:
    # Create a dummy user with this keyword
    user_features = coo_matrix(([1], ([0], [kw_idx])), shape=(1, num_keywords))
    scores = model.predict(
        user_ids=np.zeros(num_restaurants, dtype=np.int32),
        item_ids=restaurant_indices,
        item_features=keyword_restaurant_matrix,
        user_features=user_features
    )
    keyword_restaurant_scores[kw_idx] = scores

In [None]:
# Normalize scores to [0, 1] for compatibility with original binary matrix
a = (keyword_restaurant_scores - keyword_restaurant_scores.min()) / (keyword_restaurant_scores.max() - keyword_restaurant_scores.min() + 1e-10)