In [1]:
import pandas as analytics
import numpy as maths
import warnings
import time
warnings.filterwarnings("ignore")

In [2]:
df_rated_movies = analytics.read_csv('rated_movies.csv')
user_id = df_rated_movies['user_id'].unique()[0]
df_rated_movies = df_rated_movies.drop(['user_id','timestamp'],axis = 1)
df_rated_movies

Unnamed: 0,movie_id,movie_title,rating,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),1,0,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),0,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,7,Twelve Monkeys (1995),1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535,1480,Herbie Rides Again (1974),0,0,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
536,1490,Fausto (1993),0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
537,1518,Losing Isaiah (1995),1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
538,1521,Mr. Wonderful (1993),0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0


In [3]:
genres = list(df_rated_movies.drop(['movie_id','movie_title','rating'],axis = 1).columns)
selected_genres = df_rated_movies[genres].sum().sort_values(ascending = False).reset_index(drop = False).iloc[:2]['index'].tolist()
print("Most Popular user #",user_id," are ",selected_genres,"\n")

df_rated_movies['class'] = df_rated_movies[selected_genres[0]] * 2 + df_rated_movies[selected_genres[1]]
class_mapping = df_rated_movies[selected_genres + ['class']].drop_duplicates().sort_values('class').set_index('class')    # mapping of drama ,comedy movie to respective class
classes = class_mapping.index.tolist()

selected_columns = ['movie_id','rating','class']
df_rated_movies = df_rated_movies[selected_columns]
df_rated_movies

Most Popular user # 450  are  ['Drama', 'Comedy'] 



Unnamed: 0,movie_id,rating,class
0,1,1,1
1,2,1,0
2,3,1,0
3,4,0,3
4,7,1,2
...,...,...,...
535,1480,0,1
536,1490,0,1
537,1518,1,2
538,1521,0,1


In [4]:
N = 3

In [5]:
df_interested = df_rated_movies.copy()
df_interested['F'] = df_interested['rating'].apply(lambda x : 1 if x > 0 else 0)
df_interested = df_interested[df_interested['F'] > 0].drop('F',axis=1)
df_interested

Unnamed: 0,movie_id,rating,class
0,1,1,1
1,2,1,0
2,3,1,0
4,7,1,2
5,10,1,2
...,...,...,...
529,1425,1,3
530,1435,1,1
532,1444,1,1
533,1446,1,1


In [6]:
def feedback(P,feedback_values, t):                      # instead of actual fut definition, its changed a bit, i.e. if in the selected list there is a movie user likes then it is feedback is 1 else 0
    value = 0
    if (P['rating'] > 0).any() :
        value = 1
    feedback_values.insert(t,value)
    return feedback_values

def generate(state,old_movies): 
    movie_ids = []
    for _class in state.index : 
        df_temp = df_rated_movies[(df_rated_movies['mask'] == 1)][df_rated_movies['class'] == _class].sample(n = int(state[_class])+1)
        movie_ids = movie_ids + df_temp['movie_id'].to_list()
        
    if len(movie_ids) < N : 
        diff = N - len(movie_ids)
        additional_ids = df_rated_movies[df_rated_movies['mask'] == 1][~df_rated_movies['movie_id'].isin(movie_ids)].sample(n = diff)['movie_id'].tolist()
        movie_ids = movie_ids + additional_ids
    df_candidates = df_rated_movies[df_rated_movies['movie_id'].isin(movie_ids)]
   
    if maths.random.random() > 0.08 :                        # exploitation
        df_candidates = df_candidates.iloc[:N]
    else : df_candidates = df_candidates.sample(n = N)      # exploration
    df_candidates = df_candidates.drop('mask',axis = 1) 
    df_interested[df_interested['movie_id'].isin(movie_ids)]['mask'] = 0
    df_rated_movies[df_rated_movies['movie_id'].isin(old_movies)]['mask'] = 1
    return df_candidates,movie_ids
    

def find_rewards(values,rewards, feedback_value):
    gamma = 0.2
    if feedback_value > 0 : value = 1 
    else : value = -0.2  
    values.append(value*gamma**t)
    rewards.append(sum(values[:t]))
    return values, rewards
    

def transition(P,old_state):                              #equivalent to the RNN function. So it is the most complex and challenging function
    alpha = 0.6
    next_state = (P[P['rating'] > 0]['class'].value_counts()*alpha + old_state * (1-alpha)).fillna(0)
    next_state = next_state / next_state.sum()
    next_state = round(next_state * N)
    if next_state.sum() < N :
        next_state = old_state

    return next_state

def restart():
    t = 0 
    df_rated_movies['mask'] = 1
    rewards = []
    values = []
    feedback_values = [0]
    old_movies = []
    states = []
    
    initial_state = df_interested['class'].value_counts(normalize = True)*N   # State Space ( discrete :-) ) [Out of 3 items , how many belong to each class is each state. That is 4 non-negative integers add upto 3]. 20 ways are there.
    P , old_movies = generate(initial_state, old_movies)   # Action Space ( discrete :-) )
    states.append(initial_state)
    
    values , rewards = find_rewards(values , rewards, feedback_values[t])
    return rewards, values, feedback_values, old_movies, states, initial_state, P

In [7]:
df_raw = analytics.DataFrame(data = [maths.arange(0,N+1)]).T
df_merge = analytics.merge(df_raw, df_raw, how = 'cross',suffixes=('_1','_2'))
df_merge = analytics.merge(df_merge, df_raw, how = 'cross',suffixes=('_x','_y'))
df_merge = analytics.merge(df_merge, df_raw, how = 'cross')
df_merge['sum'] = df_merge.sum(axis = 1)
df_all_states = df_merge[df_merge['sum'] == N].drop('sum',axis = 1).reset_index(drop = True)
df_all_states.columns = list(maths.arange(N+1))
all_states = []
for i in range(len(df_all_states)) :
    all_states.append(df_all_states.iloc[i].astype('float64'))

# Policy Iteration

In [8]:
t = 0
rewards, values, feedback_values, old_movies, states, initial_state, P = restart()
recommendations = []

print(initial_state)

print("Rewards :",rewards[-1])

class
2    1.293651
0    0.825397
1    0.746032
3    0.134921
Name: proportion, dtype: float64
Rewards : 0


In [9]:
while t <= len(df_interested) :
    t = t + 1
    feedback_values = feedback(P,feedback_values,t)
    state = transition(P,states[t-1])
    states.append(state)
    P , old_movies = generate(state,old_movies) 
    feedback_values = feedback(P,feedback_values,t)
    recommendations.append(P)
    values , rewards = find_rewards(values , rewards, feedback_values[t])


states_policy = states
rewards_policy = rewards
recommendations_policy = recommendations

print("Rewards :",rewards[-1])

Rewards : 0.049615999999999987


# Value Iteration

In [10]:
t = 0
rewards, values, feedback_values, old_movies, states, initial_state, P = restart()
print(initial_state)
recommendations = []

print("Rewards :",rewards[-1])

class
2    1.293651
0    0.825397
1    0.746032
3    0.134921
Name: proportion, dtype: float64
Rewards : 0


In [11]:
while t <= len(df_interested) :
    t = t + 1
    feedback_values = feedback(P,feedback_values,t)
    accumulate_temp_rewards = []
    
    for temp_state in all_states :
        temp_P , temp_old_movies = generate(temp_state,old_movies)
        temp_feedback_values = feedback(temp_P, feedback_values,t)
        if temp_feedback_values[t] > 0 : l = 1
        else : l = -0.2
        gamma = 0.2
        accumulate_temp_rewards.append(l * gamma ** t)
    
    state = all_states[maths.argmax(accumulate_temp_rewards)]
    states.append(state)
    P , old_movies = generate(state,old_movies) 
    feedback_values = feedback(P,feedback_values,t)
    recommendations.append(P)
    values , rewards = find_rewards(values , rewards, feedback_values[t])

states_values = states
rewards_values = rewards
recommendations_values = recommendations

print("Rewards :",rewards[-1])

Rewards : 0.04999999901695999
