In [1]:
import pandas as pd

In [2]:
# Load the dataset
movies = pd.read_csv('ml-latest-small/ml-latest-small/movies.csv')
ratings = pd.read_csv('ml-latest-small/ml-latest-small/ratings.csv')

In [3]:
# Preview the data
print(movies.head())
print(ratings.head())

   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931


In [4]:
# Merge movies and ratings
data = pd.merge(ratings, movies, on='movieId')

In [5]:
# Drop unnecessary columns
data = data[['userId', 'title', 'rating', 'genres']]

In [6]:
# Convert genres to a list
data['genres'] = data['genres'].str.split('|')

In [7]:
# Create a user-item matrix
user_item_matrix = data.pivot_table(index='userId', columns='title', values='rating')

In [8]:
from surprise import Dataset, Reader
from surprise import SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise import dump

ModuleNotFoundError: No module named 'surprise'

In [None]:
# Load the dataset into Surprise
reader = Reader(rating_scale=(1, 5))
data_surprise = Dataset.load_from_df(data[['userId', 'title', 'rating']], reader)

In [None]:
# Split the dataset into training and testing sets
trainset, testset = train_test_split(data_surprise, test_size=0.2)

In [None]:
# Build the SVD model
model = SVD()
model.fit(trainset)

In [None]:
# Save the model
dump.dump('model.pkl', algo=model)

In [None]:
def get_recommendations(title, user_rating):
    # Get the genres of the input movie
    movie_genres = movies[movies['title'] == title]['genres'].values[0].split('|')
    
    # Filter movies that share the same genres
    genre_filtered_movies = movies[movies['genres'].apply(lambda x: any(genre in x for genre in movie_genres))]
    
    # Get the top rated movies from the filtered list
    top_movies = genre_filtered_movies.merge(ratings.groupby('movieId')['rating'].mean().reset_index(), on='movieId')
    top_movies = top_movies.sort_values(by='rating', ascending=False).head(5)
    
    return top_movies['title'].tolist()


In [None]:
import streamlit as st
from surprise import dump

# Load the model
model = dump.load('model.pkl')[1]

st.title("Movie Recommendation System")

# User input
user_id = st.number_input("Enter your User ID:", min_value=1, max_value=1000, value=1)
movie_title = st.selectbox("Select a movie:", movies['title'].tolist())
user_rating = st.slider("Rate the movie (1-5):", min_value=1, max_value=5, value=3)

if st.button("Get Recommendations"):
    # Get collaborative filtering recommendations
    movie_id = movies[movies['title'] == movie_title]['movieId'].values[0]
    model.predict(user_id, movie_id, r_ui=user_rating)
    
    # Get content-based recommendations
    recommendations = get_recommendations(movie_title, user_rating)
    
    st.write("Top 5 Recommendations:")
    for rec in recommendations:
        st.write(rec)