Завдання 1: Реалізація SVD

In [10]:
import numpy as np

def compute_svd(A):
    # Step 1: Compute A^T * A and its eigenvalues and eigenvectors
    ATA = A.T @ A
    eigenvalues_V, V = np.linalg.eig(ATA)
    
    # Sort eigenvalues and corresponding eigenvectors
    sorted_indices = np.argsort(eigenvalues_V)[::-1]
    eigenvalues_V = eigenvalues_V[sorted_indices]
    V = V[:, sorted_indices]
    
    # Step 2: Compute A * A^T and its eigenvalues and eigenvectors
    AAT = A @ A.T
    eigenvalues_U, U = np.linalg.eig(AAT)
    
    # Sort eigenvalues and corresponding eigenvectors
    sorted_indices = np.argsort(eigenvalues_U)[::-1]
    eigenvalues_U = eigenvalues_U[sorted_indices]
    U = U[:, sorted_indices]
    
    # Step 3: Compute the Sigma matrix
    # Sigma is a rectangular diagonal matrix with square roots of eigenvalues
    Sigma = np.zeros(A.shape)
    for i in range(min(A.shape)):
        Sigma[i, i] = np.sqrt(eigenvalues_U[i])
    
    # Step 4: Compute the product U * Sigma * V^T
    SVD_reconstructed = U @ Sigma @ V.T
    
    return U, Sigma, V.T, SVD_reconstructed

# Test the function with a random matrix
A = np.array([[1, 0, 0], [0, 2, 3]])
SVD_reconstructed = compute_svd(A)

SVD_reconstructed


(array([[0., 1.],
        [1., 0.]]),
 array([[3.60555128, 0.        , 0.        ],
        [0.        , 1.        , 0.        ]]),
 array([[ 0.        ,  0.5547002 ,  0.83205029],
        [ 1.        ,  0.        ,  0.        ],
        [ 0.        ,  0.83205029, -0.5547002 ]]),
 array([[1., 0., 0.],
        [0., 2., 3.]]))

In [21]:
import pandas as pd

file_path = 'ml-latest-small/ratings.csv'
df = pd.read_csv(file_path)

ratings_matrix = df.pivot(index='userId', columns='movieId', values='rating')

ratings_matrix = ratings_matrix.dropna(thresh=160, axis=0)
ratings_matrix = ratings_matrix.dropna(thresh=100, axis=1)

ratings_matrix


movieId,1,260,296,318,356,480,589,593,608,780,...,1198,1210,1265,1270,1580,2028,2571,2762,2858,2959
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,5.0,3.0,,4.0,4.0,,4.0,5.0,3.0,...,5.0,5.0,4.0,5.0,3.0,4.0,5.0,,5.0,5.0
4,,5.0,1.0,,,,,5.0,5.0,,...,3.0,,4.0,,3.0,,1.0,4.0,5.0,2.0
6,,,2.0,5.0,5.0,5.0,3.0,4.0,3.0,5.0,...,,,,,,,,,,
18,3.5,4.0,4.0,5.0,4.5,3.5,4.5,4.5,4.5,4.0,...,4.0,4.5,,4.0,3.5,4.0,4.5,4.5,,4.5
19,4.0,4.0,,,2.0,2.0,5.0,,,,...,5.0,3.0,4.0,4.0,2.0,,4.0,4.0,4.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,4.0,5.0,2.0,,3.0,3.0,3.5,,,3.5,...,,5.0,,4.0,,,,,,
606,2.5,4.5,5.0,3.5,4.0,2.5,3.5,4.5,,2.5,...,3.5,4.5,,3.5,2.5,4.0,5.0,4.0,4.5,5.0
607,4.0,3.0,3.0,5.0,,4.0,4.0,5.0,2.0,4.0,...,,3.0,3.0,3.0,3.0,5.0,5.0,5.0,3.0,
608,2.5,3.5,5.0,4.5,3.0,3.0,3.0,4.0,3.0,3.0,...,,4.0,3.5,2.0,3.5,4.5,5.0,4.5,5.0,5.0
