In [31]:
import pandas as pd
import numpy as np
import re

In [32]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

In [33]:
merged_df = pd.merge(ratings, movies, on="movieId")

In [34]:
avg_ratings = merged_df.groupby("movieId")["rating"].mean().reset_index()
avg_ratings.rename(columns={"rating": "avg_rating"}, inplace=True)
avg_ratings

Unnamed: 0,movieId,avg_rating
0,1,3.920930
1,2,3.431818
2,3,3.259615
3,4,2.357143
4,5,3.071429
...,...,...
9719,193581,4.000000
9720,193583,3.500000
9721,193585,3.500000
9722,193587,3.500000


In [35]:
movies_with_ratings = pd.merge(movies, avg_ratings, on="movieId", how="left")

In [36]:
min_avg_rating = 4.0
preferred_genres = ["Romance"]
min_release_year = 2000

In [37]:
def filter_movies(df, min_rating, genres, year):
    filtered = df[df["avg_rating"] >= min_rating].copy()
    filtered = filtered[filtered["genres"].apply(lambda g: any(gen in g for gen in genres))]
    filtered["year"] = filtered["title"].apply(
        lambda x: int(re.search(r"\((\d{4})\)", x).group(1)) if re.search(r"\((\d{4})\)", x) else np.nan
    )
    filtered = filtered[filtered["year"] >= year]
    return filtered[["movieId", "title", "avg_rating", "genres", "year"]]

In [38]:
recommended_movies = filter_movies(movies_with_ratings, min_avg_rating, preferred_genres, min_release_year)

In [39]:
print("Recommended Movies:")
print(recommended_movies.head(10))

Recommended Movies:
      movieId                                              title  avg_rating  \
2506     3353                         Closer You Get, The (2000)    4.500000   
2665     3567                                  Bossa Nova (2000)    5.000000   
2901     3888                               Skipped Parts (2000)    4.000000   
2971     3983                         You Can Count on Me (2000)    4.166667   
3087     4144      In the Mood For Love (Fa yeung nin wa) (2000)    4.214286   
3089     4147                    Nico and Dani (Krámpack) (2000)    4.000000   
3097     4157                          Price of Milk, The (2000)    4.500000   
3100     4160  Widow of St. Pierre, The (Veuve de Saint-Pierr...    4.125000   
3195     4307                       Fast Food, Fast Women (2000)    4.500000   
3215     4342                                    Big Eden (2000)    4.500000   

                            genres  year  
2506                Comedy|Romance  2000  
2665         