In [7]:
import pandas as pd

ratings_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv('u.data', sep='\t', names=ratings_cols)
movies_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'IMDb_URL',
               'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime',
               'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery',
               'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

In [8]:
movies = pd.read_csv('u.item', sep='|', names=movies_cols, encoding='latin-1', usecols=[0,1])

In [9]:
user_item_matrix = ratings.pivot_table(index='user_id', columns='movie_id', values='rating').fillna(0)

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

# User similarity
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

In [11]:
import numpy as np
from scipy.sparse.linalg import svds

# SVD predictions
R = user_item_matrix.values
user_ratings_mean = np.mean(R, axis=1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)
U, sigma, Vt = svds(R_demeaned, k=50)
sigma = np.diag(sigma)
predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

In [12]:
import pickle

# Save to pickle
with open('user_item_matrix.pkl', 'wb') as f:
    pickle.dump(user_item_matrix, f)

with open('user_similarity.pkl', 'wb') as f:
    pickle.dump(user_similarity_df, f)

with open('predicted_svd.pkl', 'wb') as f:
    pickle.dump(predicted_ratings, f)

with open('movies.pkl', 'wb') as f:
    pickle.dump(movies, f)

print("✅ All data saved as pickle files.")

✅ All data saved as pickle files.
