
### Recommndation systems using Matrix factorization , Bayesian Personalised ranking and Alternation Least squares optimization

In [None]:
import random
import pandas as pd
import numpy as np                               # loading ll necessary libraries 
import implicit
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
from sklearn.preprocessing import MinMaxScaler
import keras

In [None]:
# loading datasets
df_train   = pd.read_csv("train_data.csv")
df_test = pd.read_csv("test_data.csv")
#df_test 
#df_train.head()


Once we load the data sets , we get a list of unique user_id's and item_id's which will later be useful to create a dataframe for this challenge

In [None]:
users = list(df_train.user_id.unique())              # get a unique list of users and items 
items = list(df_train.item_id.unique())

To create  sparse matrices reprsenting user features and item features , we make use of scipy package 

In [None]:
 #Contruct a sparse matrix for our users and items containing number of plays
user_sparse_data = sparse.csr_matrix((df_train['rating'],(df_train['user_id'],df_train['item_id'])))
item_sparse_data = sparse.csr_matrix((df_train['rating'],(df_train['item_id'],df_train['user_id'])))

##### Auxillary  Functions

In [None]:

def get_recommendations(x):
    '''This function takes input x (user_id) and returns all recommendations of items with respective scores '''
                                 # model_1  - ALS , model_2 - Bayesian , model_3 - Logistic Matrix Factorization
    user_id = x                           
    recommended = model_1.recommend(user_id, user_sparse_data,N=len(items)) # get  all items with respective scores 
    return recommended


def check_list(x):
    '''
    This function returns takes in user_id as input and returns all items for a user in the test data set
    '''
    user_id = x
    check_items = df_test[df_test['user_id'] == user_id]       # subset test dataframe based on user and return all 100 items in testd dataset
    check_list=check_items['item_id'].values
    return check_list


def top_recommendations(x):
    '''
    Takes in user_id as input returns top 10 recomendations based on highest scores.  
    
    '''
    h_items =[]                              # declare empty list to store top items 
    b= get_recommendations(x)                # call function to get all (items, score) with their score in descending order 
    a = check_list(x)                        # list of all 100 items in test data
    for i,j in b:                            
        if i in a:                           # for each user get top 10 based on their score by iterating of list of users
            h_items.append(i)
    h_items = h_items[0:10]
    return h_items


In [None]:
#top_recommendations(0)

#### Model 1 : Alternating Least Squares (ALS model) 

This famous algorithm is known to work well for implicit data . We  make use of implicit library to call the inbuilt recommender model.Alternating Least Square (ALS) is  a matrix factorization algorithm 

In [None]:
#Building and fitting the model

model_1= implicit.als.AlternatingLeastSquares(factors=8, regularization=0.1, iterations=45)
alpha = 37
data = ( item_sparse_data* alpha).astype('double')
model_1.fit(data)

HBox(children=(FloatProgress(value=0.0, max=45.0), HTML(value='')))




#### Converting data into a csv fil for output . 

In [None]:
a=[]                  # declare two empty vectors to store user_id and recommendations
b=[]
for i in users:               # iterate over every user
    top_items = top_recommendations(i)     # get recommendations for each of the users
    for j in top_items:
        a.append(i)                      # append the top 10 retrieved _items
        b.append(j)    


Now we create empty data frame and load the list values into user_id and columns 

In [None]:
column_names = ["user_id", "item_id"]           
                                                  # create empty dataframe with columns
df = pd.DataFrame(columns = column_names)       
df['user_id'] = a                                 # load list values as column values
df['item_id'] = b
                       

In [None]:
df.to_csv('sample_29.csv', index=False)               # remove index and convert dataframe to csv file  

#### Model 2 : Bayesian Personalized Ranking 

This famous algorithm is known to work well for implicit data . We again make use of implicit library to call the inbuilt recommender model.

In [None]:
model_2= implicit.bpr.BayesianPersonalizedRanking(factors=8, regularization=0.1, iterations=50)
alpha = 37
data = ( item_sparse_data* alpha).astype('double')      # call function from implicit library and recommend
model_2.fit(data)

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))




In [None]:
#Get Recommendations                              # top 10 recommendations along with score
user_id = 0  
recommended_2= model_2.recommend(user_id, user_sparse_data)
print(recommended_2)

[(1378, 0.19128324), (1589, 0.18065464), (748, 0.1680759), (127, 0.1592245), (1680, 0.1575718), (1223, 0.15491632), (154, 0.15366045), (196, 0.15098137), (136, 0.15050168), (94, 0.14832266)]


#### Model 3 : Logistic Matrix Factorization
We again make use of implicit library to call the inbuilt recommender model that learns probabilistic distribution whether user like it or not .  In our case we have no '0'  values , so there is always some kind of uncertainity.

In [None]:
model_3 = implicit.lmf.LogisticMatrixFactorization(factors=20, regularization=0.1, iterations=50)
alpha = 40
data = ( item_sparse_data* alpha).astype('double')
model_3.fit(data)

HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))




In [None]:
#Get Recommendations
user_id = 0  
recommended_3= model_3.recommend(user_id, user_sparse_data)
print(recommended_3)

[(305, 13.4774685), (440, 12.796539), (256, 12.093139), (959, 11.131193), (151, 10.837425), (989, 10.773423), (1378, 10.5489235), (170, 10.455078), (1615, 10.4301195), (533, 10.392322)]
