In [2]:
import pandas as pd 
import numpy as np
import math

def get_semtype(n): #returns the type of sem ( even or odd ) depending on the sem no
    if n%2 == 1:
        return 'Odd'
    else:
        return 'Even'
    
def get_year(n): # returns the year(1st year,..,4th year) to which the sem belongs to
    return math.ceil(n/2)

def add_dicts(dict1,dict2): # custom function to add values of dicts based on the keys
    for (key,value) in dict2.items():
        if key in dict1.keys():
            dict1[key]+=value
        else:
            dict1[key] = value
    return dict1

def get_ratings(code,n,df): # Used to get user ratings. code = HS,ME,CS...  n is no of courses of that code   df is a dataframe from which courses are to be given to be rated
    flag = 0
    temp_dict = {} # contains tags and the rating of that course
    '''dataframe.samele(n) returns n random rows from a dataset'''
    df1 = df[df['Course Code'].str.contains(code)].sample(n) # choose n random rows of the data which has the particular code
    for i in range(n):
        print("please rate",df1['Course Name'].iloc[i],"taught by",df1['tags'].iloc[i][:-3], "on a scale 1-5")
        rating = int(input())
        if rating < 3.0: # If rating is less than the average rating of that course then do not consider the course. Since we do not know the average rating of courses I have assumed it to be 3.0
            flag = 1 # set flag as 1 if course is neglected
            continue 
        all_tags = df1['tags'].iloc[i].split(",") # contains all tags of that course
        for atag in all_tags: # tags and their ratings are added to temp_dict
            if atag in temp_dict.keys():
                temp_dict[atag]+=rating
            else:
                temp_dict[atag] = rating
    for atag in temp_dict:
        if len(atag) == 2 and flag == 0: # if atag is a code and the course has not been neglected then take the avg by dividing by the no of courses which was given to rate of that branch 
            temp_dict[atag] = temp_dict[atag] / n
    return temp_dict
def completed_courses(sem,branch): # returns dataframe of all core compulsory courses which he had finished
    df = course_core(1,branch) # take first sem courses in df
    ''' add all core courses to df from sem 2 to sem-1'''
    for i in range(2,sem): 
        temp = course_core(i,branch)
        df = df.append(temp,ignore_index=True)
    return df
   

def course_core(sem,branch): # returns a dataframe of the compulsory courses to be taken in that sem
    year = get_year(sem)
    semtype = get_semtype(sem)
    return core_courses[(core_courses['Year'] == str(year)) & (core_courses['Semester\n'] == semtype) & ((core_courses['Branch'].isnull()) | (core_courses['Branch'].str.contains(branch)))]
def convert_ratings(rating_dict): # calculates standard deviation from mean. Deletes entry if standard deviation from mean is negative
    ''' Everyone has a unique bias when rating eg:- someone might rate all courses lower and someone might rate all higher but both of them might have same reletaive preferences. In other words someone might like all courses and someone might not like any courses
    So, I take his bias as the mean of the courses which he rated and decide which one he trully liked by measuring the standard deviation
    '''
    
    delkeys = [] # contains the keys which have to be deleted
    mean = np.mean(list(rating_dict.values()))
    for key,value in rating_dict.items():
        rating_dict[key] = value - mean
        if rating_dict[key] < 0:
            delkeys.append(key) # delete those keys which he rated less than his bias
    for i in delkeys:
        del rating_dict[i]
    return rating_dict
def open_electives(sem,branch,n): # n is no of open electives to be chosen
        prof_ratings = {} # contains {profname : rating given by user} 
        branch_ratings = {} # contains {branch : rating given by user}
        my_dict = {} # temporarily contains ratings given to branch and prof in one dict
        df = completed_courses(sem,branch) # get all the courses which he had completed till that sem
        '''get user ratings'''
        add_dicts(my_dict,get_ratings(branch,2,df))
        add_dicts(my_dict,get_ratings('ES',2,df))
        add_dicts(my_dict,get_ratings('PH',1,df))
        add_dicts(my_dict,get_ratings('MA',1,df))
        add_dicts(my_dict,get_ratings('CH',1,df))
        print("my dict",my_dict)
        '''my_dict will contain data of this form {branch:avgBranchRating , prof:profRating}'''
        
        for key in my_dict.keys(): # seperate my_dict into two seperate dict  branch_ratings contains ratings for braches and prof_ratings contains ratings for proffessors
            #if key == branch or key == 'ES':
                #my_dict[key] = my_dict[key] / 2.0
            if len(key) == 2 or len(key) == 3:
                branch_ratings[key] = my_dict[key]
            else:
                prof_ratings[key] = my_dict[key]
        '''Convert the dicts to have better values to help us decide his preference'''
        branch_ratings = convert_ratings(branch_ratings)
        prof_ratings = convert_ratings(prof_ratings)
        temp_dict = {}
        ''' The below nested loop is given assuming only two open electives are to be given is temporary but this function has a parameter n denoting the no of open electives. So it could easily be translated to n higher than 2'''
        
        ''' Giving two courses of the highest rated branch could happen to be very wrong since the intial courses are random 
        and we do not have average ratings and also the tags are very very less so to have more accuracy its better to give him a set of reccommended course
         and the real set of courses which will be best for him will definetly be present in the set of reccomended courses
         to do that we combine two two branches and add thier rating'''
        for key1,value1 in branch_ratings.items():
            for key2,value2 in branch_ratings.items():
                if ((str(key1)+","+str(key2))) not in temp_dict.keys() and ((str(key2)+","+str(key1)) not in temp_dict.keys()):
                    temp_dict[str(key1)+","+str(key2)] = value1+value2
        return predicted_results(temp_dict,prof_ratings)
    
def convert_to_perc(my_list): # converts values of my_list to %
    sum1 = 0
    for i in my_list:
        sum1+=i
    for i in range(len(my_list)):
        my_list[i] = (my_list[i]/sum1)*100
    return my_list

def predicted_results(temp_dict,prof_dict):
    '''Since dataframe objects are not allowed to be keys in a dict
    I have used to different lists one will hold that dataframe and the other will hold the rating 
    the index will be same in thier respective lists'''
    temp_dict2 = {}
    df_list = [] # the dataframe list
    r_list = [] # the ratings list
    #tempdict = {branch1,branch2: rating}
    for key,value in temp_dict.items():
            temp_arr = np.array(key.split(','))
            unique, cts = np.unique(temp_arr, return_counts=True)
            temp_dict2 = dict(zip(unique, cts))
            #temp_dict2 contains {branch:no_of_times repeted in key} eg:-temp_dict = {'ES,EE':5} temp_dict2 = {'ES':1,'EE':1}
            r=0 # contains the rating for this pair
            t=0 # t = 0 means temp_df is empty if t != 0 means temp_df is not empty . temp_df is the dataframe to which course will be added for this pair
            for abranch,count in temp_dict2.items():
                my_list = [] # will contain the course code whose course we have taken
                flag = 0 # this flag will trigger if there are no proffesors which he likes i.e in prof_dict who are offering courses then all courses will be taken as random from that branch
                r += (value*count) # increment rating
                for prof,prof_rating in prof_dict.items():
                    '''check if there are any proffesors which he likes are offering any courses in this branch'''
                    no_of_profcrs = elective_courses[elective_courses['Course Code'].str.contains(abranch)]["tags"].str.contains(prof).sum()
                    if no_of_profcrs > 0 and count > 0:
                        flag = 1
                        r+=prof_rating # Add the prof rating too
                        if no_of_profcrs > count: # take all courses from that proffesor if he gives more than how many courses we need from the branch
                            if t == 0:
                                temp_df = elective_courses[elective_courses['Course Code'].str.contains(abranch) & elective_courses["tags"].str.contains(prof)].sample(count)
                                t+=1
                                count = 0 #set count as zero which means we do not need any more courses
                            else:
                                temp_df = temp_df.append(elective_courses[elective_courses['Course Code'].str.contains(abranch) & elective_courses["tags"].str.contains(prof)].sample(count))
                                count = 0
                        else: # if proffesor gives less than what we need then take all the courses which he gives
                            if t == 0:
                                temp_df = elective_courses[elective_courses['Course Code'].str.contains(abranch) & elective_courses["tags"].str.contains(prof)].sample(no_of_profcrs)
                                t+=1
                                count-=no_of_profcrs # decrease count by the no of courses taken
                            else:
                                temp_df = temp_df.append(elective_courses[elective_courses['Course Code'].str.contains(abranch) & elective_courses["tags"].str.contains(prof)].sample(no_of_profcrs))
                                count-=no_of_profcrs
                                
                '''update my_list to contain all course codes in temp_df. if temp_df is empty then make my_list empty list'''   
                if t == 0:
                    my_list = []
                else:
                    my_list = get_coursecode(temp_df)   
                while True:
                    if (count != 0) and (my_list != []): # if courses have to be taken and already some courses have been taken in the prof_dict for loop
                        a_df = elective_courses[elective_courses['Course Code'].str.contains(abranch)].sample(count) # choose random courses from the branch
                        temp_l = get_coursecode(a_df) # get course codes of a_df
                        if check_intersection(temp_l,my_list) == False: # if the courses are not present in temp_df then append and set count = 0 which will signify  all courses of this branch have been taken
                            temp_df = temp_df.append(a_df)
                            count = 0
                    else:
                        break
                                
                if flag == 0:  # if there are no profs which he likes are giving open electives then choose all the courses of the branch randomly  
                    if t == 0:
                        temp_df = elective_courses[elective_courses['Course Code'].str.contains(abranch)].sample(count)
                    else:
                        temp_df = temp_df.append(elective_courses[elective_courses['Course Code'].str.contains(abranch)].sample(count))
                t +=1
            ''' after doing this for each pair append the dataframe and the calculated rating to the lists'''    
            df_list.append(temp_df)
            r_list.append(r)
    r_list = convert_to_perc(r_list) # convert to %
    return (df_list,r_list)

def get_coursecode(df): # returns a list of all course codes present in the dataframe
    temp_list = []
    for i in range(len(df.index)):
        temp_list.append(df['Course Code'].iloc[i])
    return temp_list

def check_intersection(lst1,lst2): # checks for intersection between two lists
    if list(set(lst1) & set(lst2)) == []:
        return False
    else:
        True
def hs_elective(df,n): # returns a dataframe(only one 1 row) of a random HS course from all HS electives
    hs_course = elective_courses[elective_courses['Course Code'].str.contains("HS")].sample(n)
    df = df.append(hs_course)
    return df

Masterdata = pd.read_csv('/home/shivasankaran/datasets/courses3.csv')
'''split data into core courses and elective courses'''
core_courses = Masterdata[Masterdata['Branch'] !='ELE']
elective_courses = Masterdata[Masterdata['Branch'] == 'ELE']
user_display_columns = Masterdata.columns.tolist()
display_columns = ['Course Code', 'Course Name','Name of the Instructor'] # columns which will be displayed when for the recommended courses
user_Sem = int(input('Enter you current semester No:'))
user_Branch = input('Enter your branch (CL,CE,CS,MSE,EE,ME):')
recommended_courses = course_core(user_Sem,user_Branch) # intially will contain only the core courses of that sem
if user_Sem == 1:
    print('NOTE: Only one of HS101,HS102,HS103,HS104 will be allocated ')
if user_Sem>4: # from sem  5 onwards HS elective is given
    print('NOTE:HS course is chosen in random')
    recommended_courses = hs_elective(recommended_courses,1)
if user_Sem>6:    # from sem 7 onwards open elective is given
    open_ele_courses,ratings = open_electives(user_Sem,user_Branch,2) # returns list1,list2. list1 contains possible recommended dataframes and list2 contains thier corresponding ratings
    for i in range(len(open_ele_courses)):       
        print("the predicted courses\n")
        print(recommended_courses.append(open_ele_courses[i])[display_columns])
        print("percentage that the user likes:",ratings[i],'%\n')
else:
    print(recommended_courses[display_columns])

Enter you current semester No:7
Enter your branch (CL,CE,CS,MSE,EE,ME):CS
NOTE:HS course is chosen in random
please rate Introduction to Data Science taught by Anirban Dasgupta on a scale 1-5
3
please rate Compilers taught by Bireswar Das on a scale 1-5
3
please rate Computer Organisation and Architecture taught by Joycee Mekie on a scale 1-5
4
please rate Introduction to Computing taught by Krishna Prasad on a scale 1-5
4
please rate Physics (In two sections) taught by Baradhwaj Coleppa, Vinod Chandra on a scale 1-5
4
please rate Mathematics II taught by V. D. Sharma on a scale 1-5
4
please rate Chemistry Laboratory taught by Sudhanshu Sharma,Bhaskar Datta on a scale 1-5
2
my dict {'Anirban Dasgupta': 3, 'CS': 3.0, 'Bireswar Das': 3, 'Joycee Mekie': 4, 'ES': 4.0, 'Krishna Prasad': 4, 'Baradhwaj Coleppa': 4, ' Vinod Chandra': 4, 'PH': 4.0, 'V. D. Sharma': 4, 'MA': 4.0}
the predicted courses

    Course Code                 Course Name  Name of the Instructor
132      HS 112  Urdu Poetr