In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Step 1: Load dataset (MovieLens 100k dataset)
ratings = pd.read_csv('/kaggle/input/movielens-100k/ml-100k/u.data', sep='\t', names=['userId', 'movieId', 'rating', 'timestamp'])
movies = pd.read_csv('/kaggle/input/movielens-100k/ml-100k/u.item', sep='|', encoding='latin-1', names=[
    'movieId', 'title', 'release_date', 'video_release_date', 'IMDb_URL',
    'unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy',
    'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
    'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
])


# Step 2: Merge datasets
movie_data = pd.merge(ratings, movies[['movieId', 'title']], on='movieId')

# Step 3: Preprocess data
# Create a pivot table for collaborative filtering
user_movie_matrix = movie_data.pivot_table(index='userId', columns='title', values='rating')

# Fill NaN values with 0 (or you could use other imputation techniques)
user_movie_matrix.fillna(0, inplace=True)

# Step 4: Implement content-based recommendation system
# Combine genres into a single string for TF-IDF
movies['genres'] = movies[['Action', 'Adventure', 'Animation', "Children's", 'Comedy',
                           'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
                           'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']].apply(lambda x: ' '.join(x.index[x == 1]), axis=1)

# Generate a TF-IDF matrix for movie genres
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'].fillna(''))

# Compute cosine similarity between movies
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Create a reverse mapping of movie titles to indices
movie_indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

def recommend_movies_content_based(title, cosine_sim=cosine_sim):
    # Get the index of the movie that matches the title
    idx = movie_indices[title]

    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Get the movie indices
    movie_indices_list = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return movies['title'].iloc[movie_indices_list]

# Step 5: Collaborative Filtering using user similarity
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity between users
user_similarity = cosine_similarity(user_movie_matrix)

# Predict ratings
def predict_ratings(user_id, user_similarity=user_similarity):
    # Get user's similarity scores
    user_sim_scores = user_similarity[user_id - 1]  # Adjust for zero-based index

    # Weighted average of ratings by similar users
    weighted_ratings = user_sim_scores.dot(user_movie_matrix.fillna(0)) / user_sim_scores.sum()

    # Convert to DataFrame
    recommendations = pd.DataFrame(weighted_ratings, index=user_movie_matrix.columns, columns=['predicted_rating'])

    # Sort recommendations by predicted rating
    return recommendations.sort_values(by='predicted_rating', ascending=False).head(10)

# Test recommendations
print("Content-based Recommendations:")
print(recommend_movies_content_based('Toy Story (1995)'))

print("Collaborative Filtering Recommendations:")
print(predict_ratings(1))


Content-based Recommendations:
421                Aladdin and the King of Thieves (1996)
101                                Aristocats, The (1970)
403                                      Pinocchio (1940)
624                        Sword in the Stone, The (1963)
945                         Fox and the Hound, The (1981)
968           Winnie the Pooh and the Blustery Day (1968)
1065                                         Balto (1995)
1077                              Oliver & Company (1988)
1408                            Swan Princess, The (1994)
1411    Land Before Time III: The Time of the Great Gi...
Name: title, dtype: object
Collaborative Filtering Recommendations:
                                  predicted_rating
title                                             
Star Wars (1977)                          3.508474
Return of the Jedi (1983)                 2.859657
Fargo (1996)                              2.857738
Raiders of the Lost Ark (1981)            2.831511
Silence of the 

In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load dataset
ratings = pd.read_csv('/kaggle/input/movielens-100k/ml-100k/u.data', sep='\t', names=['userId', 'movieId', 'rating', 'timestamp'])
movies = pd.read_csv('/kaggle/input/movielens-100k/ml-100k/u.item', sep='|', encoding='latin-1', names=[
    'movieId', 'title', 'release_date', 'video_release_date', 'IMDb_URL',
    'unknown', 'Action', 'Adventure', 'Animation', "Children's", 'Comedy',
    'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
    'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
])

# Merge datasets
ratings_movies = pd.merge(ratings, movies, on='movieId')

# Preprocess data for collaborative filtering
user_movie_matrix = ratings_movies.pivot_table(index='userId', columns='title', values='rating')
user_movie_matrix.fillna(0, inplace=True)

# Preprocess genres for content-based filtering
genre_columns = [
    'Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime',
    'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical',
    'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
]

# Create 'genres' column as a concatenation of genre names where the value is 1
movies['genres'] = movies[genre_columns].apply(
    lambda row: ' '.join(col for col, val in zip(genre_columns, row) if val == 1),
    axis=1
)

# Content-based recommendations setup
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['genres'].fillna(''))
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

movie_indices = pd.Series(movies.index, index=movies['title']).drop_duplicates()

# Functions

# Content-based recommendation
def recommend_movies_by_title(title, cosine_sim=cosine_sim):
    try:
        idx = movie_indices[title]
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:11]  # Exclude the movie itself
        movie_indices_list = [i[0] for i in sim_scores]
        return movies['title'].iloc[movie_indices_list].tolist()
    except KeyError:
        return ["Movie not found in the dataset."]

# Collaborative filtering recommendation
def recommend_movies_for_users(movie_name, user_movie_matrix=user_movie_matrix, num_users=3):
    try:
        # Find users who rated the given movie
        movie_ratings = user_movie_matrix[movie_name]
        top_users = movie_ratings[movie_ratings > 3.5].index  # Users with high ratings for the movie
        
        recommendations = []
        for user in top_users[:num_users]:  # Limit to top `num_users`
            user_ratings = user_movie_matrix.loc[user]
            # Get top-rated movies by the user
            user_recommendations = user_ratings[user_ratings > 4].sort_values(ascending=False).index.tolist()
            recommendations.extend(user_recommendations)
        
        # Remove the input movie and return unique recommendations
        return list(set(recommendations) - {movie_name})
    except KeyError:
        return ["Movie not found in the dataset."]

# Interactive Script
while True:
    print("\n--- Movie Recommendation System ---")
    print("1. Get Content-Based Recommendations")
    print("2. Get Collaborative Recommendations")
    print("3. Exit")
    
    choice = input("Enter your choice (1/2/3): ")
    
    if choice == "1":
        movie_name = input("Enter a movie name: ")
        recommendations = recommend_movies_by_title(movie_name)
        print("\nContent-Based Recommendations:")
        for rec in recommendations:
            print(f"- {rec}")
    
    elif choice == "2":
        movie_name = input("Enter a movie name: ")
        recommendations = recommend_movies_for_users(movie_name)
        print("\nCollaborative Filtering Recommendations:")
        for rec in recommendations:
            print(f"- {rec}")
    
    elif choice == "3":
        print("Goodbye!")
        break
    
    else:
        print("Invalid choice. Please try again.")



--- Movie Recommendation System ---
1. Get Content-Based Recommendations
2. Get Collaborative Recommendations
3. Exit


Enter your choice (1/2/3):  2
Enter a movie name:  Toy Story



Collaborative Filtering Recommendations:
- Movie not found in the dataset.

--- Movie Recommendation System ---
1. Get Content-Based Recommendations
2. Get Collaborative Recommendations
3. Exit


Enter your choice (1/2/3):  1
Enter a movie name:  Nadja (1994)



Content-Based Recommendations:
- Dead Man Walking (1995)
- Mr. Holland's Opus (1995)
- White Balloon, The (1995)
- Antonia's Line (1995)
- Belle de jour (1967)
- Nadja (1994)
- Exotica (1994)
- Madness of King George, The (1994)
- Priest (1994)
- Quiz Show (1994)

--- Movie Recommendation System ---
1. Get Content-Based Recommendations
2. Get Collaborative Recommendations
3. Exit


Enter your choice (1/2/3):  2
Enter a movie name:  Desperado (1995)



Collaborative Filtering Recommendations:
- Some Like It Hot (1959)
- Alien (1979)
- Raiders of the Lost Ark (1981)
- Jean de Florette (1986)
- Menace II Society (1993)
- Graduate, The (1967)
- Big Sleep, The (1946)
- Cinema Paradiso (1988)
- Blue Angel, The (Blaue Engel, Der) (1930)
- Casablanca (1942)
- Rosewood (1997)
- Local Hero (1983)
- Bonnie and Clyde (1967)
- 2001: A Space Odyssey (1968)
- When Harry Met Sally... (1989)
- Secret of Roan Inish, The (1994)
- Sting, The (1973)
- Swingers (1996)
- Dead Poets Society (1989)
- Searching for Bobby Fischer (1993)
- To Kill a Mockingbird (1962)
- Stand by Me (1986)
- Fargo (1996)
- Mystery Science Theater 3000: The Movie (1996)
- Vertigo (1958)
- Magnificent Seven, The (1954)
- Get Shorty (1995)
- Murder, My Sweet (1944)
- Big Night (1996)
- Monty Python's Life of Brian (1979)
- Contact (1997)
- Clerks (1994)
- Citizen Kane (1941)
- Godfather, The (1972)
- GoodFellas (1990)
- Man Who Would Be King, The (1975)
- Leaving Las Vegas (1995)

Enter your choice (1/2/3):  3


Goodbye!
