In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import pandas as pd

In [2]:
# Load Y and R

mat = loadmat("ex8_songs.mat")

Y = mat["Y"]    # 1682 X 943 matrix, containing ratings (1-5) of 1682 songs on 943 user

R = mat["R"]   # 1682 X 943 matrix, R(i,j) = 1 if and only if user j give rating to song i

# Confirm the shapes of Y and R

print(Y.shape)
print(R.shape)

(1682, 943)
(1682, 943)


In [3]:
def  cofiCostFunc(params, Y, R, num_users, num_songs, num_features, Lambda):
    """
    Returns the cost and gradient for the collaborative filtering problem
    
    Lambda - regularization parameter
    
    """
        
    # Unfold the params
    X = params[:num_songs*num_features].reshape(num_songs,num_features)
    Theta = params[num_songs*num_features:].reshape(num_users,num_features)
    
    predictions =  X @ Theta.T
    err = (predictions - Y)
    J = 1/2 * np.sum((err**2) * R)
    
    #compute regularized cost function
    reg_X =  Lambda/2 * np.sum(Theta**2)
    reg_Theta = Lambda/2 *np.sum(X**2)
    reg_J = J + reg_X + reg_Theta
    
    # Compute gradient
    X_grad = err*R @ Theta
    Theta_grad = (err*R).T @ X
    grad = np.append(X_grad.flatten(),Theta_grad.flatten())
    
    # Compute regularized gradient
    reg_X_grad = X_grad + Lambda*X
    reg_Theta_grad = Theta_grad + Lambda*Theta
    reg_grad = np.append(reg_X_grad.flatten(),reg_Theta_grad.flatten())
    
    return J, grad, reg_J, reg_grad

In [5]:
#Load X and Theta from ex8_songParams.mat and print their dimensions. 

mat2 = loadmat("ex8_songParams.mat")

X = mat2["X"]  #  (num_songs X num_features)

Theta = mat2["Theta"] #  (num_users X num_features) 

#How many features have the songs ?
print(X.shape[1])
print(X.shape)
print(Theta.shape)

10
(1682, 10)
(943, 10)


In [35]:
# load the songs titles list
songsList = open("top10s.csv","r",encoding = "ISO-8859-1").read().split("\n")[:-1]

#How many songs are collected ?
num_songs = len(songsList)
# see the songs list
songsList = songsList[1:]
songsList

['1,"Hey, Soul Sister",',
 '2,Love The Way You Lie,',
 '3,TiK ToK,',
 '4,Bad Romance,',
 '5,Just the Way You Are,',
 '6,Baby,',
 '7,Dynamite,',
 '8,Secrets,',
 '9,Empire State of Mind (Part II) Broken Down,',
 '10,Only Girl (In The World),',
 "11,Club Can't Handle Me (feat. David Guetta),",
 '12,Marry You,',
 '13,Cooler Than Me - Single Mix,',
 '14,Telephone,',
 '15,Like A G6,',
 '16,OMG (feat. will.i.am),',
 '17,Eenie Meenie,',
 '18,The Time (Dirty Bit),',
 '19,Alejandro,',
 '20,Your Love Is My Drug,',
 '21,Meet Me Halfway,',
 '22,Whataya Want from Me,',
 '23,Take It Off,',
 '24,Misery,',
 '25,All The Right Moves,',
 '26,Animal,',
 '27,Naturally,',
 '28,I Like It,',
 '29,Teenage Dream,',
 '30,California Gurls,',
 '31,3,',
 '32,My First Kiss - feat. Ke$ha,',
 '33,Blah Blah Blah (feat. 3OH!3),',
 '34,Imma Be,',
 '35,Try Sleeping with a Broken Heart,',
 '36,Sexy Bitch (feat. Akon),',
 '37,Bound To You - Burlesque Original Motion Picture Soundtrack,',
 '38,If I Had You,',
 '39,Rock That B

In [16]:
# Initialize my ratings
my_ratings = np.zeros((num_songs,1))

# Create own ratings
my_ratings[0] = 4 
my_ratings[52] = 2
my_ratings[12] = 4
my_ratings[98]= 5
my_ratings[123] = 2
my_ratings[64]= 1
my_ratings[149]= 5
my_ratings[248] = 3
my_ratings[82]= 4
my_ratings[225] = 5
my_ratings[354]= 5

print("New user ratings:\n")
for i in range(len(my_ratings)):
    if my_ratings[i]>0:
        print("Rated",int(my_ratings[i]),"for ",songsList[i])

Y = np.hstack((my_ratings,Y))
R = np.hstack((my_ratings!=0,R))

#Confirm that the number of users increased by one
Y.shape

New user ratings:

Rated 4 for  1,"Hey, Soul Sister",
Rated 4 for  13,Cooler Than Me - Single Mix,
Rated 2 for  53,Someone Like You,
Rated 1 for  65,Marry You,
Rated 4 for  83,Judas,
Rated 5 for  99,Raise Your Glass,
Rated 2 for  124,Boyfriend,
Rated 5 for  150,Impossible,
Rated 5 for  226,Latch,
Rated 3 for  249,Shot Me Down (feat. Skylar Grey) - Radio Edit,
Rated 5 for  355,Time of Our Lives,


(1682, 944)

In [17]:
def normalizeRatings(Y, R):
    """
    normalized Y so that each movie has a rating of 0 on average, and returns the 
    mean rating in Ymean.
    """
  
    m = Y.shape[0]  # number of songs
    n = Y.shape[1] # number of users
    
    #Inicialize Ymean and Ynorm as column vectors of 0 with m elements 
    Ymean = np.zeros((m,1))
    Ynorm = np.zeros((m,n))
    
    for i in range(m):
        Ymean[i] = np.sum(Y[i,:])/np.count_nonzero(R[i,:])
        Ynorm[i,R[i,:]==1] = Y[i,R[i,:]==1] - Ymean[i]
        
    return Ynorm, Ymean

In [18]:
def gradientDescent(initial_parameters,Y,R,num_users,num_songs,num_features,alpha,num_iters,Lambda):
    """
    alpha - learning rate
    Optimize X and Theta
    """
    # unfold the initial parameters (consult function cofiCostFunc)
    X = initial_parameters[:num_songs*num_features].reshape(num_songs,num_features)
    Theta = initial_parameters[num_songs*num_features:].reshape(num_users,num_features)
    
    J_history =[]
    
    for i in range(num_iters):
        #Append into a single vector params X and Theta (see the code above)
        params = np.append(X.flatten(),Theta.flatten())
        cost, grad = cofiCostFunc(params, Y, R, num_users, num_songs, num_features, Lambda)[2:]
        
        # unfold grad
        X_grad = grad[:num_songs*num_features].reshape(num_songs,num_features)
        Theta_grad = grad[num_songs*num_features:].reshape(num_users,num_features)
        
    #Update the trainable parameters X and Theta with the classical gradient descent method
        X = X - (alpha * X_grad)
        
        Theta = Theta - (alpha * Theta_grad)
        
        J_history.append(cost)
    
    #Append into a single vector paramsFinal the updated X and Theta
    paramsFinal = np.append(X.flatten(),Theta.flatten())
    return paramsFinal , J_history

In [19]:
# Normalize Ratings
Ynorm, Ymean = normalizeRatings(Y, R)
# number of users
num_users = Ynorm.shape[1]

# number of songs
num_songs = Ynorm.shape[0]

num_features = 10

# Generate randomly initial values for the matrices X and Theta 
#(use for example np.random.randn())
X = np.random.randn(num_songs*num_features)
Theta = np.random.randn(num_users*num_features)

#Append into a single vector params X and Theta 
initial_parameters = np.append(X.flatten(),Theta.flatten())

Lambda = 10
num_iters=400
alpha=0.001

# Optimize parameters using Gradient Descent, use the normalized Ynorm
paramsFinal, J_history = gradientDescent(initial_parameters,Y,R,num_users,num_songs,num_features,alpha,num_iters,Lambda)

In [40]:
#unfold paramsFinal (consult function cofiCostFunc)
X = paramsFinal[:num_songs*num_features].reshape(num_songs,num_features)
Theta = paramsFinal[num_songs*num_features:].reshape(num_users,num_features)

# Predict all ratings of num_users for num_songs
p = X @ Theta.T

# Extract from p only the recomendations for the added user (the first one)
# Reshape because of rank one problem 

my_scores = p[:,0]

my_predictions = my_scores + Ymean


df = pd.DataFrame(np.column_stack((my_predictions[1:],np.array(songsList))))

df.sort_values(by=[0],ascending=False,inplace=True)
df.reset_index(drop=True,inplace=True)

#print(df[:10][:])
#Extract only the top 10 recommented songs for the added user 
#df = pd.DataFrame(np.column_stack((p[:,:10] + Ymean,np.array(songsList))))

df.sort_values(by=[0],ascending=False,inplace=True)
#df.reset_index(drop=True,inplace=True)
print(df.shape)
print(df[:10][:])

(1681, 1683)
                0                  1                  2                 3     \
0  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
1  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
2  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
3  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
4  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
5  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
6  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
7  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
8  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   
9  8.456325232829032  7.599675282339669  7.392680027790799  7.76093308277415   

                4                   5                  6                 7     \
0  7.582280730663502  7.4