In [26]:
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split as sklearn_train_test_split
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix

import pandas as pd
import numpy as np
import zipfile
import warnings
warnings.filterwarnings('ignore')

In [27]:
ratings = pd.read_csv('ratings.csv').drop(['Unnamed: 0'], axis=1)
providers = pd.read_csv('providers.csv').drop(['Unnamed: 0'], axis=1)
providers.columns = providers.columns.str.replace('index', 'Registration number')

In [28]:
## Create csr matrix
def ratings_matrix(ratings):    
    return csr_matrix(pd.crosstab(ratings['User_id'], ratings['Registration number'], ratings.rating, aggfunc=sum).fillna(0).values)  

R = ratings_matrix(ratings)

In [35]:
'''Select users from the entire dataset to be compared with the active user'''

def create_model(rating_matrix, metric):
    """
    - Create a model with default parameters.
    """
    model = NearestNeighbors(metric=metric, n_neighbors=20, algorithm='brute')
    
    model.fit(rating_matrix)    
    return model

def nearest_neighbors(rating_matrix, model):
    """    
    :param rating_matrix : матрица рейтингов (nb_users, nb_items)
    :param model : модель knn  
    """    
    similarities, neighbors = model.kneighbors(rating_matrix)        
    return similarities[:, 1:], neighbors[:, 1:]

#  We will use the Cosine similarity metric.
model = create_model(rating_matrix=R, metric='cosine')
similarities, neighbors = nearest_neighbors(R, model)

def find_candidate_items(userid):
    """
    Find items for the given user
    
    :param userid : user id
    :param neighbors : similarity between users        
    :return candidates : top 10 items for the user
    """
    user_neighbors = neighbors[userid]
    
    activities = ratings.loc[ratings['User_id'].isin(user_neighbors)]
    
    # Sort items by frequency
    frequency = activities.groupby('Registration number')['rating'].count().reset_index(name='count').sort_values(['count'],ascending=False)
    Gu_items = frequency['Registration number']
    active_items = ratings.loc[ratings['User_id'] == userid]['Registration number'].to_list()
    candidates = np.setdiff1d(Gu_items, active_items, assume_unique=True)[:10]
        
    return candidates


In [36]:
# average rating across all users
mean = ratings.groupby(by='User_id', as_index=False)['rating'].mean()
mean_ratings = pd.merge(ratings, mean, suffixes=('','_mean'), on='User_id')

# normalizing ratings
mean_ratings['norm_rating'] = mean_ratings['rating'] - mean_ratings['rating_mean']

mean = mean.to_numpy()[:, 1]
np_ratings = mean_ratings.to_numpy()

In [37]:
'''Similarity between users, which is obtained from the nearest_neighbors() function
Normalization of the rating by the overall average value for the user '''

def predict(userid, itemid):
    """
    rating predictor for userid per itemid
    
    :param
        - userid : user id to whom we predict
        - itemid : item of prediction
        
    :return
        - r_hat : predict
    """
    user_similarities = similarities[userid]
    user_neighbors = neighbors[userid]
    
    # men rating
    user_mean = mean[userid]
    
    # finding users who rank for 'itemid'
    iratings = np_ratings[np_ratings[:, 1].astype('int') == itemid]
    
    # finding similar users
    simus = iratings[np.isin(iratings[:, 0], user_neighbors)]
    
    # selection of similar users who have a rating for the selected item
    normalized_ratings = simus[:,4]
    indexes = [np.where(user_neighbors == uid)[0][0] for uid in simus[:, 0].astype('int')]
    sims = user_similarities[indexes]
    
    num = np.dot(normalized_ratings, sims)
    den = np.sum(np.abs(sims))
    
    if num == 0 or den == 0:
        return user_mean
    
    # making prediction
    r_hat = user_mean + np.dot(normalized_ratings, sims) / np.sum(np.abs(sims))
    
    return r_hat


def user2userPredictions(userid, pred_path):
    """
    Let's make a prediction for each user and save it to the prediction.csv file
    
    :param
        - userid : user id
        - pred_path : file
    """    
    
    try:
        # user search
        candidates = find_candidate_items(userid)

        # loop through all selected users for predict
        for itemid in candidates:

            # predict for user, item by item
            r_hat = predict(userid, itemid)

            # saving
            with open(pred_path, 'a+') as file:
                line = '{},{},{}\n'.format(userid, itemid, r_hat)
                file.write(line)
    except IndexError:
        pass
    

import sys
import os

def user2userCF():
    """
    Predict for all users, even with 1 rating   
    """
    # list of all users
    users = ratings['User_id'].unique()
    
    def _progress(count):
        sys.stdout.write('\rRating predictions. Progress status : %.1f%%' % (float(count/len(users))*100.0))
        sys.stdout.flush()
    
    saved_predictions = 'predictions.csv'    
    if os.path.exists(saved_predictions):
        os.remove(saved_predictions)
    
    for count, userid in enumerate(users):        
        # making prediction
        user2userPredictions(userid, saved_predictions)
        _progress(count)

In [38]:
user2userCF()

Rating predictions. Progress status : 98.5%

In [39]:
def make_Recommendation(userid):
    """
    Making recommendations for the given user 
    """
    
    saved_predictions = 'predictions.csv'
    
    predictions = pd.read_csv(saved_predictions, sep=',', names=['User_id', 'Registration number', 'predicted_rating'])
    predictions = predictions[predictions['User_id']==userid]
    List = predictions.sort_values(by=['predicted_rating'], ascending=False)
    
    List = pd.merge(List, providers, on='Registration number', how='inner')
    
    return List

In [40]:
k = make_Recommendation(0).drop(['Registration number', 'Предмет поставки', 'Важная информация'], axis=1)
k[:5]

Unnamed: 0,User_id,predicted_rating,Регистрационный номер,Наименование,Вид деятельности/отрасль,Телефон,Сводный индикатор,"Уставный капитал, RUB",Руководитель - ФИО
0,0,1.0,1065911012133,"АИФ, ООО","Торговля оптовая пищевыми продуктами, напиткам...",+7 (34242) 74614\n+7 (424) 2274614,Низкий риск,264898.0,Нестеров Александр Валентинович
1,0,1.0,1146733016902,"ОПТИМАФУД, ООО",Торговля оптовая молочными продуктами,+7 (968) 8852221\n+7 (915) 6466973\n+7 (965) 1...,Низкий риск,160000.0,Гритусев Алексей Викторович
2,0,1.0,1137847331962,"ФРУТ ТРЕЙДЕРС, ООО",Торговля оптовая фруктами и овощами,+7 (911) 2589912\n+7 (921) 4384354\n+7 (921) 4...,Низкий риск,10000.0,Дунчева Татьяна Ивановна
3,0,1.0,1162651066951,"ТД ДЕЛИКАТЕСЫ, ООО",Торговля оптовая мясом и мясными продуктами,+7 (928) 3632005,Низкий риск,10000.0,Лысенко Владимир Валентинович
4,0,1.0,1127746011007,"СТОЛИЧНЫЕ ПОСТАВКИ, ООО","Торговля оптовая сахаром, шоколадом и сахарист...",+7 (495) 7857369\n+7 (495) 7859371\n+7 (495) 7...,Низкий риск,1200000.0,Шиян Виктор Васильевич
