In [None]:
# Minibatch Gradient Descent

import numpy as np
import pandas as pd

# Import files
filename = 'data/train.csv'
data = pd.read_csv(filename)

n_movies = 10000
n_users = 10000

# Unknown parameters
a = np.zeros(n_users)  # User variations
b = np.zeros(n_movies) # Movie variations
mu = np.mean(data['rating'])

# Gradiet Descent Hyperparameters
n_epochs = 10
batch_size = 1024
eta = 0.001
predictions = np.zeros(len(data))
n_batches = int(len(data)/batch_size)

# Create indices for every batch
# Of the form [-1, batch_size, 2*batchsize, ... len(data)]
indices = [batch_size*x for x in range(n_batches)]
indices[0] = -1
indices.append(len(data) - 1)

# Gradient Descent Begins
for epoch in range(n_epochs):
    
    # Display for every epoch - Epoch number and error
    print('Epoch-',(epoch+1),'/',n_epochs)
    err = np.mean(abs(data['rating'] - predictions));
    print('Error =',err)
    
    # For every mini-batch
    for batch_no in range(n_batches):
        
        # Display after every 10% completetion
        if (batch_no+1) % (int(n_batches/10)) == 0:
            print('Completetion - ', np.ceil((batch_no+1)/n_batches*100), '\b%')
            print(np.mean(abs(predictions[predictions!=0] - data.loc[(predictions!=0), 'rating'])))
            
        # Select mini-batch
        batch = data.loc[indices[batch_no]+1:indices[batch_no+1]]
        
        # Predictions for the selected mini-batch using the current a and b
        for index, x in batch.iterrows():
            predictions[index] = mu + a[int(x['userId'])] + b[int(x['movieId'])]
        
        # Updating a and b using the predictions on the current mini-batch
        for index, x in batch.iterrows():
            mu = mu + eta * (x['rating'] - predictions[index])
            a[int(x['userId'])] = a[int(x['userId'])] + eta * (x['rating'] - predictions[index])
            b[int(x['movieId'])] = b[int(x['movieId'])] + eta * (x['rating'] - predictions[index])

In [None]:
# Stochastic Gradient Descent

import numpy as np
import pandas as pd

# Import files
filename = 'data/train.csv'
data = pd.read_csv(filename)

n_movies = 10000
n_users = 10000

# Unknown parameters
a = np.zeros(n_users)  # User variations
b = np.zeros(n_movies) # Movie variations
mu = np.mean(data['rating'])

# Gradiet Descent Hyperparameters
n_epochs = 10
batch_size = 1
eta = 0.001
predictions = np.zeros(len(data))
n_batches = int(len(data)/batch_size)

# Create indices for every batch
# Of the form [-1, batch_size, 2*batchsize, ... len(data)]
indices = [batch_size*x for x in range(n_batches)]
indices[0] = -1
indices.append(len(data) - 1)

# Gradient Descent Begins
for epoch in range(n_epochs):
    
    # Display for every epoch - Epoch number and error
    print('Epoch-',(epoch+1),'/',n_epochs)
    err = np.mean(abs(data['rating'] - predictions));
    print('Error =',err)
    
    # For every mini-batch
    for batch_no in range(n_batches):
        
        # Display after every 10% completetion
        if (batch_no+1) % (int(n_batches/10)) == 0:
            print('Completetion - ', np.ceil((batch_no+1)/n_batches*100), '\b%')
            print(np.mean(abs(predictions[predictions!=0] - data.loc[(predictions!=0), 'rating'])))
            
        # Select mini-batch
        batch = data.loc[indices[batch_no]+1:indices[batch_no+1]]
        
        # Predictions for the selected mini-batch using the current a and b
        for index, x in batch.iterrows():
            predictions[index] = mu + a[int(x['userId'])] + b[int(x['movieId'])]
        
        # Updating a and b using the predictions on the current mini-batch
        for index, x in batch.iterrows():
            mu = mu + eta * (x['rating'] - predictions[index])
            a[int(x['userId'])] = a[int(x['userId'])] + eta * (x['rating'] - predictions[index])
            b[int(x['movieId'])] = b[int(x['movieId'])] + eta * (x['rating'] - predictions[index])

In [None]:
# Batch Gradient Descent

import numpy as np
import pandas as pd

# Import files
filename = 'data/train.csv'
data = pd.read_csv(filename)

n_movies = 10000
n_users = 10000

# Unknown parameters
a = np.zeros(n_users)  # User variations
b = np.zeros(n_movies) # Movie variations
mu = np.mean(data['rating'])

# Gradiet Descent Hyperparameters
n_epochs = 10
batch_size = len(data)
eta = 0.001
predictions = np.zeros(len(data))
n_batches = int(len(data)/batch_size)

# Create indices for every batch
# Of the form [-1, batch_size, 2*batchsize, ... len(data)]
indices = [batch_size*x for x in range(n_batches)]
indices[0] = -1
indices.append(len(data) - 1)

# Gradient Descent Begins
for epoch in range(n_epochs):
    
    # Display for every epoch - Epoch number and error
    print('Epoch-',(epoch+1),'/',n_epochs)
    err = np.mean(abs(data['rating'] - predictions));
    print('Error =',err)
    
    # For every mini-batch
    for batch_no in range(n_batches):
        
        # Display after every 10% completetion
        if (batch_no+1) % (int(n_batches/10)) == 0:
            print('Completetion - ', np.ceil((batch_no+1)/n_batches*100), '\b%')
            print(np.mean(abs(predictions[predictions!=0] - data.loc[(predictions!=0), 'rating'])))
            
        # Select mini-batch
        batch = data.loc[indices[batch_no]+1:indices[batch_no+1]]
        
        # Predictions for the selected mini-batch using the current a and b
        for index, x in batch.iterrows():
            predictions[index] = mu + a[int(x['userId'])] + b[int(x['movieId'])]
        
        # Updating a and b using the predictions on the current mini-batch
        for index, x in batch.iterrows():
            mu = mu + eta * (x['rating'] - predictions[index])
            a[int(x['userId'])] = a[int(x['userId'])] + eta * (x['rating'] - predictions[index])
            b[int(x['movieId'])] = b[int(x['movieId'])] + eta * (x['rating'] - predictions[index])