Movie Recommendation system using any algorithm
------------------------------------------------
Team Members:
Nithesh B  1NT20IS101
Ganapathy  1NT20IS054
Vedanth    1NT20IS185
Shubham    1NT20IS159

In [20]:
#INPUT THE MOVIE NAME
input_movie_title = "iron man"
print("Movie name :",input_movie_title)

Movie name : iron man


User Based Recommendation
used algorithm : KNN

In [21]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
movies_df = pd.read_csv('movies.csv')
ratings_df = pd.read_csv('ratings.csv')

Content Based Recommendation
used algorithm : Random Forest

In [22]:
merged_df = pd.merge(ratings_df, movies_df, on='movieId')

# Create a user-item matrix with movie ratings
user_movie_matrix = merged_df.pivot_table(index='userId', columns='title', values='rating', fill_value=0)

# Transpose the user-item matrix to have movies as rows and users as columns
movie_user_matrix = user_movie_matrix.T

# Apply k-Nearest Neighbors clustering on the transposed user-item matrix
num_neighbors = 10  # You can experiment with the number of neighbors
knn_model = NearestNeighbors(n_neighbors=num_neighbors, metric='cosine')
knn_model.fit(movie_user_matrix)

# Function to recommend movies based on input movie
def recommend_movies(movie_title, num_recommendations=5):
    # Make the input case-insensitive and partial string matching
    input_movie_title_lower = movie_title.lower()
    filtered_movies = movies_df[movies_df['title'].str.lower().str.contains(input_movie_title_lower)]

    if filtered_movies.empty:
        return []

    # Print input movie name, genres, and rating
    input_movie = filtered_movies.iloc[0]
    print(f"Input Movie - '{input_movie['title']}':")
    print(f"Genres: {input_movie['genres']}")
    print(f"Rating: {merged_df.loc[merged_df['title'] == input_movie['title'], 'rating'].values[0]}")

    # Find the neighbors of the input movie in the transposed user-item matrix
    input_movie_index = movie_user_matrix.index.get_loc(input_movie['title'])
    _, neighbor_indices = knn_model.kneighbors(movie_user_matrix.iloc[input_movie_index, :].values.reshape(1, -1))

    # Exclude the input movie from recommendations
    neighbor_indices = neighbor_indices[:, 1:]

    # Get top N recommended movies
    top_recommendations = []
    for i in range(num_recommendations):
        movie_index = neighbor_indices[0, i]
        recommended_movie = movie_user_matrix.index[movie_index]
        top_recommendations.append(recommended_movie)

        # Print genres and rating for the recommended movie
        genres = movies_df[movies_df['title'] == recommended_movie]['genres'].values[0]
        rating = merged_df.loc[merged_df['title'] == recommended_movie, 'rating'].values[0]
        print(f"{i + 1}. {recommended_movie}   - Genres: {genres}   -Rating: {rating}")

    return top_recommendations

# input_movie_title = "city hall"
print(f"\nTop 5 movie recommendation : User Based Recommendation")
recommendations = recommend_movies(input_movie_title)



Top 5 movie recommendation : User Based Recommendation
Input Movie - 'Iron Man (2008)':
Genres: Action|Adventure|Sci-Fi
Rating: 2.0
1. Dark Knight, The (2008)   - Genres: Action|Crime|Drama|IMAX   -Rating: 4.5
2. WALL·E (2008)   - Genres: Adventure|Animation|Children|Romance|Sci-Fi   -Rating: 3.5
3. Avengers, The (2012)   - Genres: Action|Adventure|Sci-Fi|IMAX   -Rating: 2.0
4. Iron Man 2 (2010)   - Genres: Action|Adventure|Sci-Fi|Thriller|IMAX   -Rating: 4.0
5. Avatar (2009)   - Genres: Action|Adventure|Sci-Fi|IMAX   -Rating: 2.5


In [23]:
# Merge datasets
merged_df = pd.merge(ratings_df, movies_df, on='movieId')

# Create a user-item matrix with movie genres and ratings
user_movie_matrix = merged_df.pivot_table(index='userId', columns='title', values='rating', fill_value=0)

# TF-IDF Vectorization of movie genres
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(movies_df['genres'])

# Apply Random Forest clustering on TF-IDF matrix
num_clusters = 10  # You can experiment with the number of clusters
rf_classifier = RandomForestClassifier(n_estimators=num_clusters, random_state=42)
rf_classifier.fit(tfidf_matrix, movies_df['title'])

# Add cluster labels to the movies dataframe
movies_df['cluster'] = rf_classifier.predict(tfidf_matrix)

# Function to recommend movies based on input movie
def recommend_movies(movie_title, num_recommendations=5):
    # Make the input case-insensitive and partial string matching
    input_movie_title_lower = movie_title.lower()
    filtered_movies = movies_df[movies_df['title'].str.lower().str.contains(input_movie_title_lower)]

    if filtered_movies.empty:
        return []

    # Find the cluster of the input movie
    input_movie_cluster = filtered_movies['cluster'].values[0]

    # Filter movies in the same cluster as the input movie
    cluster_movies = movies_df[movies_df['cluster'] == input_movie_cluster]

    # Calculate cosine similarity between the input movie and others in the cluster
    similarity_scores = cosine_similarity(tfidf_matrix[cluster_movies.index], tfidf_matrix[movies_df.index])

    # Get movie indices sorted by similarity score
    similar_movies_indices = similarity_scores.argsort()[:, ::-1]

    # Exclude the input movie from recommendations
    similar_movies_indices = similar_movies_indices[:, 1:]

    # Get top N recommended movies
    top_recommendations = []
    for i in range(num_recommendations):
        movie_index = similar_movies_indices[0, i]
        top_recommendations.append(movies_df.iloc[movie_index])

    return top_recommendations

# Example usage
# input_movie_title = "city hall"
input_movie = movies_df[movies_df['title'].str.lower().str.contains(input_movie_title.lower())].iloc[0]

recommendations = recommend_movies(input_movie_title)
print(f"Input Movie - '{input_movie['title']}':")
print(f"Genres: {input_movie['genres']}")
print(f"Rating: {user_movie_matrix.loc[:, input_movie['title']].iloc[0]}")  # Using user-item matrix for rating
print("\nTop 5 recommended movies : Content Based Recommendation")
for i, movie in enumerate(recommendations, 1):
    print(f"{i}. {movie['title']}   -Genres: {movie['genres']}    - Rating: {user_movie_matrix.loc[:, movie['title']].iloc[0]}")

Input Movie - 'Iron Man (2008)':
Genres: Action|Adventure|Sci-Fi
Rating: 0.0

Top 5 recommended movies : Content Based Recommendation
1. Black Panther (2017)   -Genres: Action|Adventure|Sci-Fi    - Rating: 0.0
2. X-Men: Days of Future Past (2014)   -Genres: Action|Adventure|Sci-Fi    - Rating: 0.0
3. Returner (Ritaanaa) (2002)   -Genres: Action|Adventure|Sci-Fi    - Rating: 0.0
4. Transformers: Age of Extinction (2014)   -Genres: Action|Adventure|Sci-Fi    - Rating: 0.0
5. Waterworld (1995)   -Genres: Action|Adventure|Sci-Fi    - Rating: 0.0
