<a href="https://colab.research.google.com/github/Nekhaenko/test/blob/main/rec_lightfm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Recommendation in Python: LighFM

In [None]:
pip install lightfm



In [None]:
pip install scikit-optimize



In [None]:
# import dependent libraries
import pandas as pd
import os
import pickle
from scipy.sparse import csr_matrix
import numpy as np
# from IPython.display import display_html
# import warnings
import itertools
# import matplotlib.pyplot as plt
# from matplotlib.gridspec import GridSpec
# import seaborn as sns
# %matplotlib inline
import math
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import auc_score, precision_at_k, recall_at_k
from lightfm import LightFM
from skopt import forest_minimize

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
folder = '/content/drive/MyDrive/datasets/Mind/'

In [None]:
user_scored_answers = {'Hopeful':4, 'Happy':3, 'Coping':2}
user_scored_answers

{'Coping': 2, 'Happy': 3, 'Hopeful': 4}

In [None]:
# -------------------создание-таблицы-вопросов-с-рейтингами------------------------

In [None]:
questions = {'low': [('What ideas have you got on how to improve your wellbeing in <replace>?', 1.1),
                     ('What wellbeing goal can we set for you this week/month? How are you going to get there? How will you know you are on track to reach this goal?', 1.4),
                     ('Is there anyone else we need to talk to about this problem?', 1.8),
                     ('What is the main threat to your wellbeing in <replace>?', 2.1),
                     ('Why have you scored yourself so low in this <replace>?', 2.6),
                     ('If you had a friend that rated them similar in <replace> what strategies would you recommend?', 2.9)],
             'medium': [('What can you do to improve your score in this <replace>?', 3.1),
                        ('Wha best could help you to improve in <replace>?', 3.4),
                        ('What wellbeing goal can we set for you this week/month? How are you going to get there? How will you know you are on track to reach this goal?', 3.8)],
             'high': [('What wellbeing goal can we set for you this week/month? How are you going to get there? How will you know you are on track to reach this goal?', 4.1),
                      ('How would you like me to help you maintain in <replace>?', 4.4),
                      ('How can you maintain a high wellbeing score in <replace>?', 4.7),
                      ('Who could best help you to maintain a high wellbeing score?', 4.9)]}

# questions

In [None]:
cols = ['question', 'level']
df = pd.DataFrame(columns=cols)
k=0
for i in questions:
  for qi in questions[i]:
    df.loc[k] = [qi[0], i]
    k += 1

# df

In [None]:
priority = ['Hopeful', 'Happy', 'Coping', 'Accepted', 'Safe', 'Healthy']
questions = pd.DataFrame()
for pri in priority:
    tmp = df.copy()
    tmp['question'] = tmp['question'].str.replace('<replace>', pri.lower())
    tmp['type'] = pri
    questions = pd.concat([questions, tmp], ignore_index=True)


In [None]:
# ---------Предобработка-данных-пользователя-----------------------

In [None]:
user_scored_level = {k: ('high' if v >= 4 else 'medium' if v == 3 else 'low')
              for (k, v) in user_scored_answers.items()}

In [None]:
import json

In [None]:
with open('/content/drive/MyDrive/datasets/Mind/config.json', 'w') as f:
    json.dump(config, f)

In [None]:
levels = {1:'low', 2:'medium', 3:'high'}
# config = {'levels': levels}

In [None]:
# Создание-всех вариантов моделей

In [None]:
all_models = [[1, 2, 3], [1, 2, 3], [1, 2, 3]]
model_inds = list(itertools.product(*all_models))
len(model_inds)

27

In [None]:
models = pd.DataFrame({'combination':model_inds})

In [None]:
def check_level(x, pos):
  x = x[pos]
  return levels[x]

In [None]:
models['Coping'] = models['combination'].apply(check_level, pos = 0)
models['Happy'] = models['combination'].apply(check_level, pos = 1)
models['Hopeful'] = models['combination'].apply(check_level, pos = 2)
models['number'] = models.index

In [None]:
models.head()

Unnamed: 0,combination,Coping,Happy,Hopeful,number
0,"(1, 1, 1)",low,low,low,0
1,"(1, 1, 2)",low,low,medium,1
2,"(1, 1, 3)",low,low,high,2
3,"(1, 2, 1)",low,medium,low,3
4,"(1, 2, 2)",low,medium,medium,4


In [None]:
models.to_csv(f'{folder}models.csv', index=False)

Создание списка комбинаций вопросов для рекомендаций

In [None]:
def remove_dupl(qq):
  quest_len = df.shape[0]
  w1 = math.modf(qq[1]/quest_len)
  w2 = math.modf(qq[2]/quest_len)

  w1 = round(w1[0], 5)
  w2 = round(w2[0], 5)
  # print(qq[0], qq[1] - quest_len, w1, w2)
  if (qq[0] == (qq[1] - quest_len))|(w1 == w2):
    return True
  else:
    return False

In [None]:
def save_combinations(models, questions_df):
  total_quest_comb = pd.DataFrame()
  quest_dict_path = 'drive/MyDrive/datasets/Mind/questions_dicts/'
  for imod in models.index:
    quest_indexs = []
    # print(imod)
    user_scored_level = {'Coping': models.loc[imod,'Coping'],
                         'Happy': models.loc[imod,'Happy'],
                         'Hopeful': models.loc[imod,'Hopeful']}

    for i in user_scored_level:
      level = user_scored_level[i]
      quest_list = questions_df[(questions_df['type']==i)&
                (questions_df['level']== level)].index.tolist()

      quest_indexs.append(quest_list)

    indexes = list(itertools.product(*quest_indexs))
    # print(len(indexes))
    # for it in indexes:
    #   if remove_dupl(it):
    #     indexes.remove(it)

    questions_list = []
    for idx in indexes:
      questions_tmp = questions_df.loc[list(idx), 'question'].tolist()
      questions_tmp = '; '.join(questions_tmp)
      questions_list.append(questions_tmp)

    quest_comb = {'combinations': indexes, 'questions': questions_list}
    quest_comb = pd.DataFrame(quest_comb)
    # add random ratings
    rng = np.random.default_rng(12345)
    rints = rng.integers(low=0, high=5, size= quest_comb.shape[0])
    quest_comb['avg_rating']= rints
    quest_comb['quest_comb_id'] = quest_comb.index
    quest_comb['model'] = imod
    total_quest_comb = pd.concat([total_quest_comb, quest_comb], ignore_index=True)
    # quest_comb.to_csv(f'{quest_dict_path}{imod}.csv', index=False)

  total_quest_comb.to_csv(f'{quest_dict_path}total_quest_comb.csv', index=False)

In [None]:
save_combinations(models, questions)

In [None]:
# quest_indexs = []
# for i in user_scored_level:
#   level = user_scored_level[i]
#   quest_list = questions[(questions['type']==i)&
#             (questions['level']== level)]

#   questions = quest_list['question'].tolist()
#   questions = '; '.join(questions)
#   quest_list = quest_list.index.tolist()

#   quest_indexs.append(quest_list)

# quest_indexs

In [None]:
# indexes = list(itertools.product(*quest_indexs))
# len(indexes)

In [None]:
# for it in indexes:
#   if remove_dupl(it):
#     indexes.remove(it)

# len(indexes)

In [None]:
# quest_comb = {'combinations': indexes}
# quest_comb = pd.DataFrame(quest_comb)
# # add random ratings
# rng = np.random.default_rng(12345)
# rints = rng.integers(low=0, high=5, size= quest_comb.shape[0])
# quest_comb['avg_rating']= rints
# quest_comb['quest_comb_id'] = quest_comb.index
# quest_comb.head()

In [None]:
# quest_comb.to_csv(f'.csv', index=False)

In [None]:
# создание тестовых пользователей с рейтингами наборов вопросов

In [None]:
interactions = {'user_id':[1, 2, 1, 2, 2], 
                'quest_comb_id':[1, 2, 3, 4, 5], 
                'rating':[3, 4, 0, 1, 2]}

interactions_selected = pd.DataFrame(interactions)
interactions_selected

Unnamed: 0,user_id,quest_comb_id,rating
0,1,1,3
1,2,2,4
2,1,3,0
3,2,4,1
4,2,5,2


In [None]:
item_dict ={}

for i in quest_comb.index:
    item_dict[i] = quest_comb.loc[i,'combinations']

# item_dict

In [None]:
# dummify categorical features
quest_comb_metadata_transformed = pd.get_dummies(quest_comb, 
                                                     columns = ['avg_rating', 
                                                                # 'ratings_count'
                                                                ])
# quest_comb_metadata_selected_transformed
quest_comb_metadata_transformed = quest_comb_metadata_transformed.sort_values('quest_comb_id').reset_index().drop('index', axis=1)
quest_comb_metadata_transformed.head()

Unnamed: 0,combinations,quest_comb_id,avg_rating_0,avg_rating_1,avg_rating_2,avg_rating_3,avg_rating_4
0,"(9, 19, 26)",0,0,0,0,1,0
1,"(9, 19, 27)",1,0,1,0,0,0
2,"(9, 19, 28)",2,0,0,0,1,0
3,"(9, 19, 29)",3,0,1,0,0,0
4,"(9, 19, 30)",4,0,1,0,0,0


In [None]:
# convert to csr matrix
quest_comb_metadata_csr = csr_matrix(quest_comb_metadata_transformed.drop(['quest_comb_id',
                                                                           'combinations'
                                                                           ], axis=1).values)
quest_comb_metadata_csr

<72x5 sparse matrix of type '<class 'numpy.uint8'>'
	with 72 stored elements in Compressed Sparse Row format>

In [None]:
user_interaction = pd.pivot_table(interactions_selected, index='user_id', 
                                       columns='quest_comb_id', values='rating')

# fill missing values with 0
user_interaction = user_interaction.fillna(0)

user_interaction.head()

quest_comb_id,1,2,3,4,5
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,3.0,0.0,0.0,0.0,0.0
2,0.0,4.0,0.0,1.0,2.0


In [None]:
user_id = list(user_interaction.index)
user_dict = {}
counter = 0 
for i in user_id:
    user_dict[i] = counter
    counter += 1

user_dict

{1: 0, 2: 1}

In [None]:
# convert to csr matrix
user_interaction_csr = csr_matrix(user_interaction.values)
user_interaction_csr

<2x5 sparse matrix of type '<class 'numpy.float64'>'
	with 4 stored elements in Compressed Sparse Row format>

In [None]:
model = LightFM(loss='warp',
                random_state=2016,
                learning_rate=0.90,
                no_components=150,
                user_alpha=0.000005)

model = model.fit(user_interaction_csr,
                  epochs=100,
                  num_threads=16, verbose=False)

In [None]:
with open('model.dump', 'wb') as f0:
  pickle.dump(model, f0)

In [None]:
def sample_recommendation_user(model, interactions, user_id, user_dict, 
                               item_dict, quest_csr, threshold = 0,
                               nrec_items = 5, show = True):
    
    n_users, n_items = interactions.shape
    user_x = user_dict[user_id]
    
    scores = pd.Series(model.predict(user_x,
                                     np.arange(n_items), 
                                     item_features= quest_csr))
    
    scores.index = interactions.columns
    scores = list(pd.Series(scores.sort_values(ascending=False).index))
    
    known_items = list(pd.Series(interactions.loc[user_id,:] \
                                 [interactions.loc[user_id,:] > threshold].index).sort_values(ascending=False))
    
    scores = [x for x in scores if x not in known_items]
    return_score_list = scores[0:nrec_items]
    known_items = list(pd.Series(known_items).apply(lambda x: ', '.join([str(i) for i in item_dict[x]])))
    # print(known_items)
    scores = list(pd.Series(return_score_list).apply(lambda x: ', '.join([str(i) for i in item_dict[x]])))
    if show == True:
        print ("User: " + str(user_id))
        # print("Known Likes:")
        # counter = 1
        # for i in known_items:
        #     print(str(counter) + '- ' + i)
        #     counter+=1

        print("\n Recommended Items:")
        counter = 1
        for i in scores:
            print(str(counter) + '- ' + i)
            
            counter+=1
            newi = i.split(', ')
            for inew in newi:
              # print(questions.loc[int(inew)])
              print(questions.loc[int(inew), 'question'])

In [None]:
sample_recommendation_user(model, user_interaction, 
                           2,
                           user_dict, item_dict, quest_comb_metadata_csr)

User: 2
Known Likes:
1- 9, 19, 31
2- 9, 19, 30
3- 9, 19, 28

 Recommended Items:
1- 9, 19, 27
What wellbeing goal can we set for you this week/month? How are you going to get there? How will you know you are on track to reach this goal?
What can you do to improve your score in this happy?
What wellbeing goal can we set for you this week/month? How are you going to get there? How will you know you are on track to reach this goal?
2- 9, 19, 29
What wellbeing goal can we set for you this week/month? How are you going to get there? How will you know you are on track to reach this goal?
What can you do to improve your score in this happy?
What is the main threat to your wellbeing in coping?
