In [17]:
import pandas
import numpy as np
import time
import warnings
warnings.filterwarnings('ignore')

class UserHistory():
    def __init__(self):
        self.train_data = None
        self.user_id = None
        self.item_id = None
        self.cooccurence_matrix = None
        self.all_courses = None
    
        
    #Get unique items (courses) corresponding to a given user
    def get_user_items(self, user):
        user_data = self.train_data[self.train_data[self.user_id] == user]
        user_items = list(user_data[self.item_id].unique())
        
        return user_items
        
    #Get unique users for a given item (courses)
    def get_item_users(self, item):
        item_data = self.train_data[self.train_data[self.item_id] == item]
        item_users = set(item_data[self.user_id].unique())
            
        return item_users
        
    #Get unique items (courses) in the training data
    def get_all_items_train_data(self):
        all_items = list(self.train_data[self.item_id].unique())
            
        return all_items
        
    #Construct cooccurence matrix
    def construct_cooccurence_matrix(self):
            
        cooccurence_matrix = np.matrix(np.zeros(shape=(len(self.all_courses), len(self.all_courses))), float)
        users_courses=[]
        for i in range(0,len(self.all_courses)):
            #Calculate unique users of course i
            courses_i_data = self.train_data[self.train_data[self.item_id] == self.all_courses[i]]
            users_i = set(courses_i_data[self.user_id].unique())
            users_courses.append(users_i)
        
        for i in range(0,len(self.all_courses)):
            for j in range(0,len(self.all_courses)):
                if i<=j:
                    users_intersection = users_courses[i].intersection(users_courses[j])
                    if len(users_intersection) != 0:
                        users_union = users_courses[i].union(users_courses[j])
                        cooccurence_matrix[i,j] = float(len(users_intersection))/float(len(users_union))
                    else:
                        cooccurence_matrix[i,j] = 0
                else:
                    cooccurence_matrix[i,j]=cooccurence_matrix[j,i]                       
        return cooccurence_matrix

    
    #Use the cooccurence matrix to make top recommendations
    def generate_top_recommendations(self, user_courses):
        #print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(self.cooccurence_matrix))
        
        courses_taken_index = [self.all_courses.index(i) for i in user_courses]
        user_cooccurence_matrix = self.cooccurence_matrix[courses_taken_index]
   
        #Calculate a weighted average of the scores in cooccurence matrix for all user courses.
        user_sim_scores = user_cooccurence_matrix.sum(axis=0)/float(user_cooccurence_matrix.shape[0])
        user_sim_scores = np.array(user_sim_scores)[0].tolist()
 
        #Sort the indices of user_sim_scores based upon their value
        #Also maintain the corresponding score
        sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True)
    
        #Create a list from the following
        result=[]
         
        #Fill the list with top 10 item based recommendations
        rank = 1 
        for i in range(0,len(sort_index)):
            if ~np.isnan(sort_index[i][0]) and self.all_courses[sort_index[i][1]] not in user_courses and rank <= 10:
                result.append(self.all_courses[sort_index[i][1]])
                rank = rank+1
        return result
    
        #Get similar items to given items
            
            
############################## create the object
 
    #Create the model
    def create(self, train_data, code=True):
        start = time.time()
        self.train_data = train_data
        self.user_id = 'user'
        if code:
            self.item_id = 'course_code'
        else:
            self.item_id = 'course_name'
        self.all_courses = self.get_all_items_train_data()
        self.cooccurence_matrix = self.construct_cooccurence_matrix()
        print ('model created. Time spent in seconds:', time.time()-start)

##############################  functions to call
    #for each user
    def user_recommender(self, user):
        user_courses = self.get_user_items(user)    
        #print("No. of unique courses for the user: %d" % len(user_courses))
        #print("no. of unique courses in the training set: %d" % len(self.all_courses))
        result = self.generate_top_recommendations( user_courses)
        return result
    
    #for each course
    def course_recommender(self, item_list):
        
        user_courses = item_list
    
        #print("no. of unique courses in the training set: %d" % len(self.all_courses))

        df_recommendations = self.generate_top_recommendations(user_courses)
        
        return df_recommendations
    
    #for every course
    def prelogin_recommender(self):
        start=time.time()
        columns = ['course', 'rank1','rank2','rank3','rank4','rank5','rank6','rank7','rank8','rank9','rank10']
        result_df = pandas.DataFrame(columns=columns)
        for i,value in enumerate(self.all_courses):
            result_df.loc[i]=[value]+self.course_recommender([value])
        result_df.to_csv('course_recommeder.csv')
        print('result saved to csv. Time spent in seconds::',time.time()-start)
        return result_df

In [18]:
import random
random.seed(1)
string='ABCDEFGHIJKLMN'
result,i=[],0
while i<100:
    user=random.randint(0,30)
    course=string[random.randint(0,13)]
    if [user,course] not in result:
        result.append([user,course])
        i=i+1
data=pandas.DataFrame(result,columns=['user','course_code'])

In [19]:
data

Unnamed: 0,user,course_code
0,4,J
1,27,M
2,24,B
3,8,B
4,15,M
5,14,H
6,20,G
7,25,D
8,3,H
9,0,N


In [20]:
model=UserHistory()
model.create(data)
result=model.prelogin_recommender()

model created. Time spent in seconds: 0.029773950576782227
result saved to csv. Time spent in seconds:: 0.07537221908569336


In [21]:
result

Unnamed: 0,course,rank1,rank2,rank3,rank4,rank5,rank6,rank7,rank8,rank9,rank10
0,J,D,I,N,A,M,B,K,E,F,G
1,M,A,N,D,J,L,H,I,C,K,E
2,B,K,F,J,L,A,H,I,C,E,N
3,H,L,I,F,N,M,B,C,K,E,D
4,G,K,I,E,A,L,N,C,F,J,M
5,D,N,J,M,A,I,C,H,E,K,F
6,N,D,I,A,M,J,K,E,G,H,C
7,A,M,I,K,N,E,G,D,J,B,F
8,L,H,F,C,G,M,B,J,A,N,E
9,F,L,H,B,G,J,A,N,M,C,E
