<a href="https://colab.research.google.com/github/T4690/DECISION-TREE-IMPLEMENTATION/blob/main/Task_4_(RECOMMENDATION_SYSTEM).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# prompt: BUILD A recommendation system using collaborative filtering or matrix factorization techniques

# Correct the typo in the curl command to download the dataset
!curl http://files.grouplens.org/datasets/movielens/ml-latest-small.zip -o ml-latest.small.zip

# Unzip the downloaded file
!unzip -o ml-latest.small.zip # Use -o to overwrite if the directory exists

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the datasets
# Ensure the path is correct relative to where the unzip command extracts the files
ratings_df = pd.read_csv('ml-latest-small/ratings.csv')
movies_df = pd.read_csv('ml-latest-small/movies.csv')

# Merge the dataframes
movie_ratings = pd.merge(ratings_df, movies_df, on='movieId')

# Create a user-item matrix
user_movie_matrix = movie_ratings.pivot_table(index='userId', columns='title', values='rating')

# Fill NaN values with 0 (or other appropriate strategy)
user_movie_matrix = user_movie_matrix.fillna(0)

# Split data into training and testing sets (optional, for evaluation)
# For simplicity, we will use the whole matrix for demonstration of factorization
# train_data, test_data = train_test_split(ratings_df, test_size=0.2, random_state=42)

# Implement Matrix Factorization using Singular Value Decomposition (SVD)
# For a large matrix, consider using libraries like surprise or tensorflow_recommenders

try:
    from scipy.sparse.linalg import svds
    # Number of latent factors
    n_components = 50

    # Apply SVD
    # Convert the user_movie_matrix to a NumPy array for svds
    U, sigma, Vt = svds(user_movie_matrix.values, k=n_components)

    # Convert sigma to a diagonal matrix
    sigma = np.diag(sigma)

    # Reconstruct the original matrix (or approximate it)
    # predicted_ratings = np.dot(np.dot(U, sigma), Vt) # This would be the full matrix

    # Function to predict rating for a specific user and movie
    def predict_rating(user_id, movie_title, U, sigma, Vt, user_index_map, movie_title_map):
        try:
            user_idx = user_index_map[user_id]
            movie_idx = movie_title_map[movie_title]
            # Calculate the predicted rating using the factorized matrices
            predicted_rating = np.dot(U[user_idx, :], np.dot(sigma, Vt[:, movie_idx]))
            return predicted_rating
        except KeyError:
            return None # Movie or user not in the training data

    # Create mappings from original IDs/titles to matrix indices
    user_index_map = {original_id: i for i, original_id in enumerate(user_movie_matrix.index)}
    movie_title_map = {original_title: i for i, original_title in enumerate(user_movie_matrix.columns)}

    # Function to get recommendations for a user
    def recommend_movies(user_id, user_movie_matrix, U, sigma, Vt, user_index_map, movie_title_map, num_recommendations=10):
        if user_id not in user_index_map:
            return "User not found in the training data."

        user_idx = user_index_map[user_id]
        # Get the row for the user from the original matrix to find unrated movies
        user_ratings = user_movie_matrix.loc[user_id]
        unrated_movies = user_ratings[user_ratings == 0].index # Find movies the user hasn't rated

        # Predict ratings for unrated movies
        predicted_ratings = {}
        for movie_title in unrated_movies:
            # Use the predict_rating function to get the predicted rating for each unrated movie
            predicted_rating = predict_rating(user_id, movie_title, U, sigma, Vt, user_index_map, movie_title_map)
            if predicted_rating is not None:
                predicted_ratings[movie_title] = predicted_rating

        # Sort movies by predicted rating in descending order
        recommended_movies = sorted(predicted_ratings.items(), key=lambda item: item[1], reverse=True)

        return recommended_movies[:num_recommendations]

    # Example usage: Get recommendations for user with userId 1
    user_id_to_recommend = 1
    recommendations = recommend_movies(user_id_to_recommend, user_movie_matrix, U, sigma, Vt, user_index_map, movie_title_map)

    print(f"Recommendations for User {user_id_to_recommend}:")
    # Check if recommendations is a list before iterating
    if isinstance(recommendations, list):
        for movie, rating in recommendations:
            print(f"- {movie}: {rating:.2f}")
    else:
        # Handle the case where the user was not found
        print(recommendations)


except ImportError:
    print("Scipy not found. Please install it: !pip install scipy")
    print("Matrix factorization using SVD requires scipy.")
except Exception as e:
    print(f"An error occurred during SVD or recommendation: {e}")

# Note: For a production system, consider using libraries like Surprise or implementing
# more robust matrix factorization techniques like ALS (Alternating Least Squares).
# Also, handling cold-start users/items would be necessary.

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  955k  100  955k    0     0  1289k      0 --:--:-- --:--:-- --:--:-- 1289k
Archive:  ml-latest.small.zip
   creating: ml-latest-small/
  inflating: ml-latest-small/links.csv  
  inflating: ml-latest-small/tags.csv  
  inflating: ml-latest-small/ratings.csv  
  inflating: ml-latest-small/README.txt  
  inflating: ml-latest-small/movies.csv  
Recommendations for User 1:
- Die Hard (1988): 4.01
- Godfather: Part II, The (1974): 3.30
- Jaws (1975): 3.30
- Breakfast Club, The (1985): 2.87
- Godfather, The (1972): 2.86
- Stand by Me (1986): 2.79
- Christmas Story, A (1983): 2.60
- Lady and the Tramp (1955): 2.46
- Snatch (2000): 2.40
- Little Mermaid, The (1989): 2.37
