In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import scipy
from implicit.evaluation import train_test_split
from implicit.als import AlternatingLeastSquares

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
test_df = pd.read_csv('data/test_users.csv')
reviews = pd.read_csv('data/reviews.csv', index_col=0)
reviews['rating'] = np.array(reviews.rating.fillna(0) + 1)
orgs = pd.read_csv('data/organisations.csv')
users = pd.read_csv('data/users.csv')

# encode users ids as numeric
reviews = reviews.merge(users, on='user_id')
reviews = reviews.rename({'city': 'user_city'}, axis=1)

# # encode orgs ids as numeric
reviews = reviews.merge(orgs[['org_id', 'city','rubrics_id','features_id','rating','average_bill']], on='org_id')
reviews = reviews.rename({'city': 'org_city'}, axis=1)

In [3]:
rows, r_pos = np.unique(reviews.values[:,1], return_inverse=True)  # Extract unique user IDs and their positions
cols, c_pos = np.unique(reviews.values[:,0], return_inverse=True)  # Extract unique organization IDs and their positions

In [4]:
# Create a sparse matrix of user-organization interactions with ratings as values
interactions_sparse = scipy.sparse.csr_matrix((np.array(reviews['rating_x'], dtype=np.float64), (r_pos, c_pos)))

In [5]:
train, test_set = train_test_split(interactions_sparse,0.99)

In [6]:
machine = AlternatingLeastSquares(factors = 512, iterations=10)

machine.fit(train)

100%|██████████| 10/10 [14:39<00:00, 87.97s/it]


In [7]:
reviews['r_pos'] = r_pos  # Add the numeric user indices as a new column 'r_pos' in the reviews DataFrame
reviews['c_pos'] = c_pos  # Add the numeric organization indices as a new column 'c_pos' in the reviews DataFrame

In [9]:
test = pd.read_csv('data/test_users.csv',index_col=0)
test = test.merge(reviews,how='left')[['user_id','user_city','r_pos']].drop_duplicates('user_id')
orgs = reviews[['org_id','org_city','c_pos']].drop_duplicates('org_id')


In [15]:
res = []
for indx,row in test.iterrows():
    user_city = row['user_city']
    user_pos = row['r_pos']
    to_exclude = orgs[orgs.org_city != user_city].c_pos.tolist()
    try:
        recommend_index, scores = machine.recommend(user_pos,
                                                    interactions_sparse[user_pos],
                                                    items=to_exclude,
                                                    filter_already_liked_items=True,
                                                    N=40)
        res.append((row['user_id'],
                    list(cols[recommend_index])))
    except:
        res.append((row['user_id'],[]))

In [16]:
answers = pd.DataFrame(res,columns=['user_id','target'])

In [17]:
answers.to_csv('answers.csv')