Project Name:Recent_Hindi_Movies_Recommendation_System

Industry - Codsoft

Contribution - Individual

Member Name - Adhiraj Karjee

Project Summary: The project aims to build a simple movie recommendation system using collaborative filtering based on user ratings. Given a dataset of user ratings for movies, the system recommends similar or variety of movies to users based on their preferences.



In [29]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Sample Hindi movie dataset
ratings_data = {
    "userId": [1, 2, 1, 3, 4, 2, 5, 3, 6, 4],
    "movieId": [101, 101, 102, 103, 102, 103, 104, 105, 101, 105],
    "rating": [4.5, 3.0, 4.0, 5.0, 4.5, 4.8, 5.0, 3.5, 4.2, 4.0],
}

movies_data = {
    "movieId": [101, 102, 103, 104, 105],
    "title": [
        "Pushpa:The Rule-Part2(Action)",
        "Singham Again(Action)",
        "Kalki 2898  AD(Science-fiction action film)",
        "Bhool Bhulaiya 3(Horror Thriller)",
        "Srikanth(Motivation)",
    ],
}

ratings = pd.DataFrame(ratings_data)
movies = pd.DataFrame(movies_data)

# Create sparse matrix
def create_matrix(df):
    N = len(df["userId"].unique())
    M = len(df["movieId"].unique())

    user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
    movie_mapper = dict(zip(np.unique(df["movieId"]), list(range(M))))

    user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
    movie_inv_mapper = dict(zip(list(range(M)), np.unique(df["movieId"])))

    user_index = [user_mapper[i] for i in df["userId"]]
    movie_index = [movie_mapper[i] for i in df["movieId"]]

    X = csr_matrix((df["rating"], (movie_index, user_index)), shape=(M, N))

    return X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper

X, user_mapper, movie_mapper, user_inv_mapper, movie_inv_mapper = create_matrix(ratings)

# Define the find_similar_movies function
def find_similar_movies(movie_id, X, k, metric="cosine", show_distance=False):
    try:
        neighbour_ids = []

        # Map movie ID to index
        movie_ind = movie_mapper[movie_id]
        movie_vec = X[movie_ind]

        # Increment k to account for the query movie itself
        k += 1

        # Fit the KNN model
        kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
        kNN.fit(X)

        # Find k-nearest neighbors
        movie_vec = movie_vec.reshape(1, -1)
        if show_distance:
            distances, indices = kNN.kneighbors(movie_vec, return_distance=True)
        else:
            indices = kNN.kneighbors(movie_vec, return_distance=False)

        # Collect neighbor IDs
        for idx in indices[0]:  # Access the 1D array of neighbors
            neighbour_ids.append(movie_inv_mapper[idx])

        # Exclude the query movie itself
        neighbour_ids.pop(0)
        return neighbour_ids

    except KeyError:
        print(f"Movie with ID {movie_id} not found.")
        return []
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

# Movie titles dictionary
movie_titles = dict(zip(movies["movieId"], movies["title"]))

# Recommend movies for a user
def recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=10):
    df1 = ratings[ratings["userId"] == user_id]

    if df1.empty:
        print(f"User with ID {user_id} does not exist.")
        return

    movie_id = df1[df1["rating"] == max(df1["rating"])]["movieId"].iloc[0]
    similar_ids = find_similar_movies(movie_id, X, k)
    movie_title = movie_titles.get(movie_id, "Movie not found")

    print(f"Since you watched {movie_title}, you might also like:")
    for i in similar_ids:
        print(movie_titles.get(i, "Movie not found"))

# Test the recommendation system
user_id = 1
recommend_movies_for_user(user_id, X, user_mapper, movie_mapper, movie_inv_mapper, k=3)



Since you watched Pushpa:The Rule-Part2(Action), you might also like:
Singham Again(Action)
Kalki 2898  AD(Science-fiction action film)
Bhool Bhulaiya 3(Horror Thriller)
