In [None]:
from lightfm import LightFM
from lightfm.data import Dataset
from azureml.core import Workspace, Dataset as AzureDataset
import pandas as pd

In [None]:
# Define Azure ML workspace parameters
subscription_id = 'x'
resource_group = 'y'
workspace_name = 'z'

# Connect to Azure ML workspace
workspace = Workspace(subscription_id, resource_group, workspace_name)

In [None]:
# Load datasets
usersdf = AzureDataset.get_by_name(workspace, name='usersfake').to_pandas_dataframe()
trainingsdf = AzureDataset.get_by_name(workspace, name='trainings').to_pandas_dataframe()
trainingstakendf = AzureDataset.get_by_name(workspace, name='trainingtakenfake').to_pandas_dataframe()

In [None]:
# Ensure User-Id and Training-Id are numeric
trainingstakendf["User-Id"] = pd.to_numeric(trainingstakendf["User-Id"])
trainingstakendf["Training-Id"] = pd.to_numeric(trainingstakendf["Training-Id"])

In [None]:
# Initialize the LightFM Dataset
dataset1 = Dataset()

In [None]:
# Create user features
def feature_colon_value(my_list):
    """
    Takes a list of values and prepends column names to respective values.
    For example: if my_list = [1, 1, 0, 'del'],
    the resultant output = ['ou:1', 'skills:1', 'language:0', 'grade:del', 'career interests:del']
    """
    result = []
    column_prefixes = ['ou:', 'skills:', 'language:', 'grade:', 'career interests:']

    for prefix, value in zip(column_prefixes, my_list):
        result.append(f"{prefix}{value}")

    return result

In [None]:
# Generate the feature list
ad_subset = usersdf[["ou", 'skills', 'language', 'grade', 'career interests']]
ad_list = [list(x) for x in ad_subset.values]
feature_list = [feature_colon_value(item) for item in ad_list]

In [None]:
# Create tuples of User IDs and their respective feature lists
user_tuple = list(zip(usersdf['User-Id'], feature_list))

# Output the user_tuple
print(user_tuple)

In [None]:
# Fit the dataset with user and item ids, and user features
dataset1.fit(
    usersdf['User-Id'].unique(),
    trainingsdf['Training-Id'].unique(),
    user_features=[f for sublist in feature_list for f in sublist]
)

In [None]:
# Build interactions matrix
(interactions, weights) = dataset1.build_interactions(
    [(x["User-Id"], x["Training-Id"]) for _, x in trainingstakendf.iterrows()]
)

In [None]:
# Check interactions and weights matrices
print(interactions.todense())
print(weights.todense())

In [None]:
# Build user features
user_features = dataset1.build_user_features(user_tuple, normalize=False)

In [None]:
# Initialize and train the LightFM model
model = LightFM(loss='warp')
model.fit(interactions, user_features=user_features, epochs=30, num_threads=2)

In [None]:
user_features.todense()

In [None]:
user_features.shape

In [None]:
# Assuming dataset1 is already defined and fitted with user and item data
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset1.mapping()

# Accessing user_feature_map
print(user_feature_map)

In [None]:
model = LightFM(loss='warp')
model.fit(interactions,
          user_features= user_features,
          epochs=10)

In [None]:
from lightfm.evaluation import auc_score

# Assuming model, interactions, and user_features are defined
train_auc = auc_score(model,
                      interactions,
                      user_features=user_features).mean()

print('Hybrid training set AUC: %s' % train_auc)

In [None]:
import numpy as np

# Assuming user_id_map is obtained from dataset1.mapping()
user_x = user_id_map[9212216]  # Mapping of user ID 9212216 in LightFM format
n_users, n_items = interactions.shape  # Shape of interactions matrix

# Assuming model is already trained
scores = model.predict(user_x, np.arange(n_items))

# Print or inspect the scores
print(scores)