In [8]:
import os

repo_dir = "Movie-Recommender-System"

if os.path.exists(repo_dir):
    print(f"{repo_dir} already exists. Removing it...\n")
    !rm -r {repo_dir}

# Clone the repository from GitHub
!git clone https://github.com/Goshmar/Movie-Recommender-System

Movie-Recommender-System already exists. Removing it...

Cloning into 'Movie-Recommender-System'...
remote: Enumerating objects: 26, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 26 (delta 3), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (26/26), 6.67 MiB | 11.15 MiB/s, done.
Resolving deltas: 100% (3/3), done.


In [9]:
import requests
import zipfile
import pandas as pd
import numpy as np

# Define the paths
zip_file_path = "/content/Movie-Recommender-System/data/raw/ml-100k.zip"

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(".")

# ZIP cleaning up
os.remove(zip_file_path)

In [10]:
# Download data
rating_matrix = pd.read_csv("/content/Movie-Recommender-System/data/interim/rating_matrix.csv")
feature_matrix = pd.read_csv("/content/Movie-Recommender-System/data/interim/feature_matrix.csv")
item_info = pd.read_csv('/content/ml-100k/u.item', sep='|', encoding='latin-1', names=['movie_id', 'movie_title', 'release_date', 'video_release_date', 'IMDB_URL', 'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'])

In [13]:
from scipy.sparse.linalg import spsolve_triangular
from scipy.sparse import csr_matrix

# Calculate SVD for the rating matrix
u, s, vt = np.linalg.svd(rating_matrix.values, full_matrices=False)
v = vt.T[:, :50]

# Item similarity matrix
d = 0.5  # off-diagonal similarity factor
item_similarity_matrix = np.eye(feature_matrix.shape[1])
item_similarity_matrix[-1, -3] = d
item_similarity_matrix[-3, -1] = d

# Finding Cholesky factors
L = np.linalg.cholesky(item_similarity_matrix)

# Update this line to match the shape of the feature matrix
u2, s2, v2 = np.linalg.svd(np.dot(feature_matrix.values, L), full_matrices=False)
v2 = v2.T[:, :50]

In [15]:
# Recommendations for the user with the hybrid model
rv = spsolve_triangular(csr_matrix(L.T), v2, lower=False)

# Extract top N recommendations
top_n_hybrid = np.argsort(-np.dot(feature_matrix.values, np.dot(L, v2)).dot(rv.T))[0][:5]

# Create a list of dictionaries for the recommendations
recommendations_data = []
for i, idx in enumerate(top_n_hybrid):
    movie_id = item_info.iloc[idx]['movie_id']
    movie_title = item_info.iloc[idx]['movie_title']
    recommendations_data.append({
        'movie_id': movie_id,
        'movie_title': movie_title
    })

# Convert the list of dictionaries to a DataFrame
recommendations_df = pd.DataFrame(recommendations_data)
recommendations_df

Unnamed: 0,movie_id,movie_title
0,19,Antonia's Line (1995)
1,8,Babe (1995)
2,5,Copycat (1995)
3,1,Toy Story (1995)
4,16,French Twist (Gazon maudit) (1995)
