In [1]:
import pandas as analytics
import numpy as maths
import warnings
import time
warnings.filterwarnings("ignore")

In [2]:
df_rated_movies = analytics.read_csv('rated_movies.csv')
user_id = df_rated_movies['user_id'].unique()[0]
df_rated_movies = df_rated_movies.drop(['user_id','timestamp'],axis = 1)
df_rated_movies

Unnamed: 0,movie_id,movie_title,rating,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),1,0,0,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),0,1,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,7,Twelve Monkeys (1995),1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535,1480,Herbie Rides Again (1974),0,0,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
536,1490,Fausto (1993),0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
537,1518,Losing Isaiah (1995),1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
538,1521,Mr. Wonderful (1993),0,0,0,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0


In [3]:
genres = list(df_rated_movies.drop(['movie_id','movie_title','rating'],axis = 1).columns)
selected_genres = df_rated_movies[genres].sum().sort_values(ascending = False).reset_index(drop = False).iloc[:2]['index'].tolist()
print("Most Popular user #",user_id," are ",selected_genres,"\n")

df_rated_movies['class'] = df_rated_movies[selected_genres[0]] * 2 + df_rated_movies[selected_genres[1]]
class_mapping = df_rated_movies[selected_genres + ['class']].drop_duplicates().sort_values('class').set_index('class')    # mapping of drama ,comedy movie to respective class
classes = class_mapping.index.tolist()

selected_columns = ['movie_id','rating','class']
df_rated_movies = df_rated_movies[selected_columns]
df_rated_movies

Most Popular user # 450  are  ['Drama', 'Comedy'] 



Unnamed: 0,movie_id,rating,class
0,1,1,1
1,2,1,0
2,3,1,0
3,4,0,3
4,7,1,2
...,...,...,...
535,1480,0,1
536,1490,0,1
537,1518,1,2
538,1521,0,1


In [17]:
N = 3

In [18]:
df_interested = df_rated_movies.copy()
df_interested['F'] = df_interested['rating'].apply(lambda x : 1 if x > 0 else 0)
df_interested = df_interested[df_interested['F'] > 0].drop('F',axis=1)
df_interested

Unnamed: 0,movie_id,rating,class,mask
0,1,1,1,1
1,2,1,0,1
2,3,1,0,1
4,7,1,2,1
5,10,1,2,1
...,...,...,...,...
529,1425,1,3,1
530,1435,1,1,1
532,1444,1,1,1
533,1446,1,1,1


In [19]:

def feedback(P,feedback_values, t):                      # instead of actual fut definition, its changed a bit, i.e. if in the selected list there is a movie user likes then it is feedback is 1 else 0
    value = 0
    if (P['rating'] > 0).any() :
        value = 1
    feedback_values.insert(t,value)
    return feedback_values
    

In [20]:
def generate(state,old_movies): 
    movie_ids = []
    for _class in state.index : 
        df_temp = df_rated_movies[(df_rated_movies['mask'] == 1)][df_rated_movies['class'] == _class].sample(n = int(state[_class])+1)
        movie_ids = movie_ids + df_temp['movie_id'].to_list()
        
    if len(movie_ids) < N : 
        diff = N - len(movie_ids)
        additional_ids = df_rated_movies[df_rated_movies['mask'] == 1][~df_rated_movies['movie_id'].isin(movie_ids)].sample(n = diff)['movie_id'].tolist()
        movie_ids = movie_ids + additional_ids
    df_candidates = df_rated_movies[df_rated_movies['movie_id'].isin(movie_ids)]
   
    if maths.random.random() > 0.08 :                        # exploitation
        df_candidates = df_candidates.iloc[:N]
    else : df_candidates = df_candidates.sample(n = N)      # exploration
    df_candidates = df_candidates.drop('mask',axis = 1) 
    df_interested[df_interested['movie_id'].isin(movie_ids)]['mask'] = 0
    df_rated_movies[df_rated_movies['movie_id'].isin(old_movies)]['mask'] = 1
    return df_candidates,movie_ids
    

In [21]:
def find_rewards(values,rewards, feedback_value):
    gamma = 0.2
    if feedback_value > 0 : value = 1 
    else : value = -0.2  
    values.append(value*gamma**t)
    rewards.append(sum(values[:t]))
    return values, rewards
    

In [22]:
def transition(P,old_state):                              #equivalent to the RNN function. So it is the most complex and challenging function
    alpha = 0.6
    next_state = (P[P['rating'] > 0]['class'].value_counts()*alpha + old_state * (1-alpha)).fillna(0)
    next_state = next_state / next_state.sum()
    next_state = next_state * N
    if next_state.sum() < N :
        next_state = old_state
    # print(next_state)
    
    return next_state

In [23]:
df_rated_movies.groupby('class').agg({'rating':lambda x:x.sum()})

Unnamed: 0_level_0,rating
class,Unnamed: 1_level_1
0,128
1,103
2,215
3,21


In [None]:
def restart():
    t = 0 
    df_rated_movies['mask'] = 1
    rewards = []
    values = []
    feedback_values = [0]
    old_movies = []
    states = []
    
    initial_state = df_interested['class'].value_counts(normalize = True)*N   # State Space ( discrete :-) ) [Out of 10 items , how many belong to each class is each state. That is four non-negative integers add upto 10]. 286 ways are there.
    P , old_movies = generate(initial_state, old_movies)   # Action Space ( discrete :-) )
    states.append(initial_state)
    
    values , rewards = find_rewards(values , rewards, feedback_values[t])
    return rewards, values, feedback_values, old_movies, states, initial_state, P

In [103]:
rewards, values, feedback_values, old_movies, states, initial_state, P = rewards, values, feedback_values, old_movies, states, initial_state, P
print(initial_state)

print("Rewards :",rewards[-1])

class
2    1.293651
0    0.825397
1    0.746032
3    0.134921
Name: proportion, dtype: float64
Rewards : 0


# Value Iteration

In [11]:
def transition(P,old_state):
    next_state = P[P['rating'] > 0]['class'].value_counts(normalize = True)
    next_state = next_state * N
    if next_state.sum() < N : next_state = old_state
    
    return next_state

# Policy Iteration

In [26]:
while t <= len(df_interested) :
    t = t + 1
    feedback_values = feedback(P,feedback_values,t)
    state = transition(P,states[t-1])
    states.append(state)
    P , old_movies = generate(state,old_movies) 
    feedback_values = feedback(P,feedback_values,t)
    
    values , rewards = find_rewards(values , rewards, feedback_values[t])


print("Rewards :",rewards[-1])
# print(state)

Rewards : 0.04999999999999999


In [14]:
states

[class
 2    4.312169
 0    2.751323
 1    2.486772
 3    0.449735
 Name: proportion, dtype: float64,
 class
 0    3.364842
 1    1.849988
 2    4.785171
 3    0.000000
 dtype: float64,
 class
 0    3.364842
 1    1.849988
 2    4.785171
 3    0.000000
 dtype: float64,
 class
 0    3.985039
 1    2.063825
 2    3.951136
 3    0.000000
 dtype: float64,
 class
 0    4.139043
 1    1.738451
 2    3.390798
 3    0.731707
 dtype: float64,
 class
 0    4.139043
 1    1.738451
 2    3.390798
 3    0.731707
 dtype: float64,
 class
 0    4.214168
 1    1.579732
 2    3.117463
 3    1.088638
 dtype: float64,
 class
 0    0.000000
 1    4.281246
 2    5.718754
 3    0.000000
 dtype: float64,
 class
 0    0.638298
 1    3.736700
 2    5.625002
 3    0.000000
 dtype: float64,
 class
 0    0.909914
 1    2.866681
 2    5.585107
 3    0.638298
 dtype: float64,
 class
 0    0.000000
 1    2.229026
 2    6.679454
 3    1.091520
 dtype: float64,
 class
 0    0.681818
 1    1.695012
 2    6.445206
 3    

In [15]:
sum(feedback_values)/len(feedback_values)

0.9986824769433466

In [16]:
rewards

[0,
 -0.2,
 0.0,
 0.04000000000000001,
 0.04800000000000001,
 0.049600000000000005,
 0.049920000000000006,
 0.04998400000000001,
 0.04999680000000001,
 0.04999936000000001,
 0.04999987200000001,
 0.04999997440000001,
 0.04999999488000001,
 0.04999999897600001,
 0.049999999795200006,
 0.049999999959040003,
 0.049999999991808,
 0.0499999999983616,
 0.04999999999967232,
 0.049999999999934465,
 0.049999999999986895,
 0.04999999999999738,
 0.049999999999999475,
 0.04999999999999989,
 0.049999999999999975,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.04999999999999999,
 0.0499999