In [27]:
import numpy as np

def confusion_matrix(y_true, y_pred,normalize=None):
    """Computes the confusion matrix from predictions and labels.

    The matrix columns represent the real labels and the rows represent the
    prediction labels. The confusion matrix is always a 2-D array of shape `[n_labels, n_labels]`,
    where `n_labels` is the number of valid labels for a given classification task. Both
    prediction and labels must be 1-D arrays of the same shape in order for this
    function to work.

    Parameters:
        y_true: 1-D array of real labels for the classification task.
        y_pred: 1-D array of predictions for a given classification.
        normalize: One of ['true', 'pred', 'all', None], corresponding to column sum, row sum, matrix sum, or no
                   normalization.

    Returns:
        A 2-D array with shape `[n_labels, n_labels]` representing the confusion
        matrix, where `n` is the number of possible labels in the classification
        task.
    """
    x = y_true
    y = y_pred
    N_ele = np.unique(x)
    N_Len = len(N_ele)
    cm = np.zeros((N_Len,1))
    
    for i in range(N_Len):
        temp = y[x==N_ele[i]]
        emt_hist = np.zeros((N_Len,1))
        Hist = np.asarray(np.unique(temp, return_counts=True)).T
        temp_hist_r1 = Hist[:,0].reshape((len(Hist[:,0]), 1))
        temp_hist_r2 = Hist[:,1].reshape((len(Hist[:,0]), 1))
        emt_hist[temp_hist_r1[:,0]] = temp_hist_r2
        emt_hist = emt_hist.reshape((len(emt_hist), 1))
        cm = np.hstack((cm, emt_hist))

    cm = np.delete(cm, 0, axis=1)
    cm = np.transpose(cm)

    if normalize not in ['true', 'pred', 'all', None]:
        raise ValueError("normalize must be one of {'true', 'pred', 'all', None}")

    if normalize == 'true':
        cm = cm / cm.sum(axis=1, keepdims=True)
    elif normalize == 'pred':
        cm = cm / cm.sum(axis=0, keepdims=True)
    elif normalize == 'all':
        cm = cm / cm.sum()
        # TODO (TASK 1)

    return cm


def precision(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred,normalize='pred')
    p = cm.diagonal()
    return p
    # TODO (TASK 2)


def recall(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred,normalize='true')
    r = cm.diagonal()
    return r
    # TODO (TASK 2)


def false_alarm_rate(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    
    FP = cm.sum(axis=0) - np.diag(cm)  
    #FN = cm.sum(axis=1) - np.diag(cm)
    TP = np.diag(cm)
    #TN = cm.sum() - (FP + FN + TP)
    
    FAR = FP/(TP+FP)
    return FAR



In [28]:
# %load datasets.py
import os
import ssl
import wget
import zipfile

import numpy as np
import pandas as pd


def download_and_prepare(name, path):
    if name == "movielens-small":
        print(f"Preparing dataset {name}...")
        # Check if data has been extracted and if not download extract it
        if (os.path.exists(os.path.join(path, "ml-latest-small"))):
            print(f"Dataset {name} already extracted.")
        else:
            print(f"Downloading dataset {name}...")
            ssl._create_default_https_context = ssl._create_unverified_context
            url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
            wget.download(url, path)
            print(f"Extracting dataset {name}...")
            with zipfile.ZipFile(os.path.join(path, "ml-latest-small.zip"), 'r') as zip_ref:
                zip_ref.extractall(path)

        # Read dataset with pandas
        ratings = pd.read_csv(os.path.join(path, 'ml-latest-small', 'ratings.csv'))
        print(f"{len(ratings)} entries read.")
        r_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

        return np.array(r_matrix) # for performance reasons we only take every 2nd element along each axis

    else:
        raise ValueError



In [29]:
X = download_and_prepare('movielens-small', 'D:/SEM 2/DPR/Programming/')

Preparing dataset movielens-small...
Dataset movielens-small already extracted.
100836 entries read.


In [30]:
# %load recommender_system.py
import numpy as np

class MatrixFactorization(object):
    """Matrix factorization for movie recommendations.

    Parameters:
        R (ndarray): ratings matrix (0 for no ratings, 1-5 if a rating exists)
        factors (int): number of factors for matrix factorization
        steps (int): number of steps to perform during training
        lr (float): learning rate
    """
    def __init__(self, R, factors=5, steps=1000, lr=1e-4):
        self.R = R
        self.factors = factors
        self.steps = steps
        self.lr = lr

        # Generate mask for known entries (non-zero elements), split the mask into a train and test mask
        self.mask = np.ones(R.shape)# TODO (TASK 1)
        self.split = # TODO (TASK 1)
        self.mask_train = # TODO (TASK 1)
        self.mask_test = # TODO (TASK 1)
        print(f"Known entries: {self.mask.sum()}, {self.mask_train.sum()} used for training and {self.mask_test.sum()} used for testing.")

        # Initialize low-rank user and movie matrix uniformly between 0 and 1
        self.U = np.random.rand(self.R.shape[0], self.factors).astype(dtype='float32')
        self.V = np.random.rand(self.R.shape[1], self.factors).astype(dtype='float32')

        # Compute total amount of parameters that have to be estimated
        total_parameters = self.U.reshape(-1).size + self.V.reshape(-1).size
        print(f"User matrix shape: {self.U.shape}, movie matrix shape: {self.V.shape}, total parameters: {total_parameters}")

    def gradient_user_matrix(self, error):
        return # TODO (TASK 2)

    def gradient_movie_matrix(self, error):
        return # TODO (TASK 2)

    def update_user_matrix(self, u_grad):
        self.U = # TODO (TASK 3)

    def update_movie_matrix(self, v_grad):
        self.V = # TODO (TASK 3)

    def rmse(self, split='all'):
        if split == 'train':
            rmse = np.sqrt(np.sum(self.mask_train * (self.R - np.matmul(self.U, self.V.T)) ** 2) / np.sum(self.mask_train))
        elif split == 'test':
            rmse = np.sqrt(np.sum(self.mask_test * (self.R - np.matmul(self.U, self.V.T)) ** 2) / np.sum(self.mask_test))
        else:
            rmse = np.sqrt(np.sum(self.mask * (self.R - np.matmul(self.U, self.V.T)) ** 2) / np.sum(self.mask))
        return rmse

    def fit(self):
        for i in range(self.steps):

            if i % 100 == 0:
                print(f"Step {i}/{self.steps}, RMSE (train): {self.rmse('train'):.4f}, RMSE (test): {self.rmse('test'):.4f}")

            error = # TODO (TASK 2) # Compute the error outside the gradient computation, so we don't have to do it twice
            u_grad = self.gradient_user_matrix(error)
            v_grad = self.gradient_movie_matrix(error)
            self.update_user_matrix(u_grad)
            self.update_movie_matrix(v_grad)

        print(f"Step {self.steps}/{self.steps}, RMSE (train): {self.rmse('train'):.4f}, RMSE (test): {self.rmse('test'):.4f}")

        return np.matmul(self.U, self.V.T)


SyntaxError: invalid syntax (4041716692.py, line 21)

In [1]:

import numpy as np

from metrics import confusion_matrix, precision, recall, false_alarm_rate
from datasets import download_and_prepare
from recommender_system import MatrixFactorization


def main():
    np.random.seed(42)
    np.set_printoptions(precision=2, floatmode='fixed')

    # Part I
    print("------------------------------------------------")
    print("Part I - Confusion matrix")
    print("------------------------------------------------")

    y_true = np.random.randint(0, 2, 20)
    y_pred = np.random.randint(0, 2, 20)

    print("Unnormalized confusion matrix:")
    cm = confusion_matrix(y_true, y_pred)
    print(cm)
    print("Matrix sum normalization:")
    cm = confusion_matrix(y_true, y_pred, normalize='all')
    print(cm)
    print("Row sum normalization:")
    cm = confusion_matrix(y_true, y_pred, normalize='pred')
    print(cm)
    print("Column sum normalization:")
    cm = confusion_matrix(y_true, y_pred, normalize='true')
    print(cm)
    print(f"Precision: {precision(y_true, y_pred):.2f}, recall: {recall(y_true, y_pred):.2f}"
          f", false alarm rate: {false_alarm_rate(y_true, y_pred):.2f}")

'''    # Part II
    print("------------------------------------------------")
    print("Part II - Movie Recommender System")
    print("------------------------------------------------")

    X = download_and_prepare('movielens-small', '../datasets')
    matrixFactor = MatrixFactorization(X)
    r_hat = matrixFactor.fit()

if __name__ == "__main__":
    main()
'''

SyntaxError: invalid syntax (recommender_system.py, line 20)

In [4]:
import numpy as np

from metrics import confusion_matrix, precision, recall, false_alarm_rate



def main():
    np.random.seed(42)
    np.set_printoptions(precision=2, floatmode='fixed')

    # Part I
    print("------------------------------------------------")
    print("Part I - Confusion matrix")
    print("------------------------------------------------")

    y_true = np.random.randint(0, 2, 20)
    y_pred = np.random.randint(0, 2, 20)

    print("Unnormalized confusion matrix:")
    cm = confusion_matrix(y_true, y_pred)
    print(cm)
    print("Matrix sum normalization:")
    cm = confusion_matrix(y_true, y_pred, normalize='all')
    print(cm)
    print("Row sum normalization:")
    cm = confusion_matrix(y_true, y_pred, normalize='pred')
    print(cm)
    print("Column sum normalization:")
    cm = confusion_matrix(y_true, y_pred, normalize='true')
    print(cm)
    print('Precision:' , precision(y_true, y_pred), '\nRecall:',recall(y_true, y_pred), '\nFalse alarm rate:', false_alarm_rate(y_true, y_pred))

In [5]:
if __name__ == "__main__":
    main()

------------------------------------------------
Part I - Confusion matrix
------------------------------------------------
Unnormalized confusion matrix:
[[4.00 9.00]
 [3.00 4.00]]
Matrix sum normalization:
[[0.20 0.45]
 [0.15 0.20]]
Row sum normalization:
[[0.57 0.69]
 [0.43 0.31]]
Column sum normalization:
[[0.31 0.69]
 [0.43 0.57]]
Precision: [0.57 0.31] 
Recall: [0.31 0.57] 
False alarm rate: [0.43 0.69]
