In [1]:
import pandas as pd
import requests
from PIL import Image

### Helper Functions

In [2]:
def get_displayed_movies():
    return movies.head(50)

def get_recommended_movies(new_user_ratings):
    return movies.head(10)

def get_popular_movies(genre: str):
    if genre == genres[1]:
        return movies.head(10)
    else: 
        return movies[10:20]

def get_movie_image_url(movie_id):
    img = Image.open(f"movies_folder/{movie_id}.jpg")
    return img

# Function to get top movies for the selected genre
def get_top_movies(genre, movie_stats, movies, top_n=10):
    # Filter movies by the selected genre
    genre_movies = movies[movies['Genres'].str.contains(genre)]
    # Join with the movie stats
    genre_movies_stats = genre_movies.join(movie_stats, on='MovieID')
    # Filter movies with more than a threshold of ratings to avoid movies with few high ratings
    popular_movies = genre_movies_stats[genre_movies_stats['count'] > 100]
    # Get the top N movies by average rating
    top_movies = popular_movies.sort_values(by='mean', ascending=False).head(top_n)
    return top_movies

def rating_to_stars(rating):
    full_stars = int(rating)
    half_star = "⭐" if rating - full_stars >= 0.5 else ""
    return '⭐' * full_stars + half_star

# System 1

Here we select the top 10 movies of each genre based on a weighted average of the ratings via:

(mean_ratings * count) / (count + median)

where:
- mean_ratings is the mean of the ratings for the movie
- count is the number of reviews for this movie
- median is the median number of reviews for movies in this genre

This weighted rating is how we deal with movies that receive only one 5-point review be considered highly rated and such.

(notes: the get_top_movies function was altered from the one above used in streamlit in order for it to work in the notebook)

In [3]:
import pandas as pd

def get_top_movies(genre, movie_stats, movies):
    genre_movies = movies[movies['Genres'].str.contains(genre)]
    merged_data = pd.merge(genre_movies, movie_stats, left_on='MovieID', right_index=True)
    top_movies = merged_data.sort_values(by='weighted_mean', ascending=False).head(10)
    return top_movies

movies_columns = ['MovieID', 'Title', 'Genres']
movies = pd.read_csv('data/movies.dat', sep='::', engine='python', names=movies_columns, encoding='ISO-8859-1')

ratings_columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']
ratings = pd.read_csv('data/ratings.dat', sep='::', engine='python', names=ratings_columns, encoding='ISO-8859-1')

genre = 'Sci-Fi'  # Replace this with your preferred genre

# Load and preprocess the data
data = pd.merge(ratings, movies, on='MovieID')
movie_stats = data.groupby('MovieID').agg({'Rating': ['mean', 'count']})
movie_stats.columns = ['mean', 'count']
genre_median_ratings = data.groupby('Genres')['Rating'].count().median()
movie_stats['weighted_mean'] = (
    (movie_stats['mean'] * movie_stats['count']) / (movie_stats['count'] + genre_median_ratings)
)

top_movies = get_top_movies(genre, movie_stats, movies)

print(f"Top 10 Highly-Rated Movies in {genre}")
for _, row in top_movies.iterrows():
    movie_id = row['MovieID']
    title = row['Title']
    rating = row['weighted_mean']
    image_url = get_movie_image_url(movie_id)

    print(f"{title}")
    print(f"Rating: {rating_to_stars(rating)} ({rating:.1f})")
    print(image_url)
    print("\n")




Top 10 Highly-Rated Movies in Sci-Fi
Star Wars: Episode IV - A New Hope (1977)
Rating: ⭐⭐⭐ (3.4)
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=185x278 at 0x12712E750>


Star Wars: Episode V - The Empire Strikes Back (1980)
Rating: ⭐⭐⭐ (3.2)
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=185x278 at 0x12712FA90>


Matrix, The (1999)
Rating: ⭐⭐⭐ (3.1)
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=185x278 at 0x12712E510>


Star Wars: Episode VI - Return of the Jedi (1983)
Rating: ⭐⭐⭐ (3.0)
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=185x278 at 0x12712F950>


Terminator 2: Judgment Day (1991)
Rating: ⭐⭐⭐ (3.0)
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=185x278 at 0x12712F810>


Back to the Future (1985)
Rating: ⭐⭐⭐ (2.9)
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=185x278 at 0x12712DED0>


Terminator, The (1984)
Rating: ⭐⭐⭐ (2.8)
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=185x278 at 0x12712E750>


Alien (1979)
Rating: ⭐

# System 2