# Movie Recommender System

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

 Reading CSV Files and making a pivoted dataframe.

In [None]:
df = pd.read_csv('ml-latest-small/ratings.csv')
movies_name = pd.read_csv('ml-latest-small/movies.csv')
df = pd.pivot(df, index='movieId', columns='userId', values='rating')
# df = df[df.notna().sum(axis=1) >= 50]
movies_ids = df.fillna(0)
movies_ids

Retrieving the top 10 movies based on the average rating of a movie that has atleast 50 rating on it.

In [None]:
average_ratings = movies_ids.mean(axis=1) 
counts = (movies_ids != 0).sum(axis=1)

ratings_summary = pd.DataFrame({
    'movieId': average_ratings.index,
    'average_rating': average_ratings.values,
    'num_ratings': counts.values
})

movies_with_ratings = ratings_summary.merge(movies_name, on='movieId')
movies_with_ratings_filtered = movies_with_ratings[movies_with_ratings['num_ratings'] >= 50]
top10_movies = movies_with_ratings_filtered.sort_values('average_rating', ascending=False).head(10)

top10_titles = top10_movies['title'].tolist()

# Hard coded poster urls to reduce API calls

top10_posters = [
    'https://m.media-amazon.com/images/M/MV5BMDAyY2FhYjctNDc5OS00MDNlLThiMGUtY2UxYWVkNGY2ZjljXkEyXkFqcGc@._V1_FMjpg_UX1000_.jpg',
    'https://m.media-amazon.com/images/M/MV5BNDYwNzVjMTItZmU5YS00YjQ5LTljYjgtMjY2NDVmYWMyNWFmXkEyXkFqcGc@._V1_FMjpg_UX1000_.jpg',
    'https://m.media-amazon.com/images/M/MV5BYTViYTE3ZGQtNDBlMC00ZTAyLTkyODMtZGRiZDg0MjA2YThkXkEyXkFqcGc@._V1_.jpg',
    'https://m.media-amazon.com/images/M/MV5BN2NmN2VhMTQtMDNiOS00NDlhLTliMjgtODE2ZTY0ODQyNDRhXkEyXkFqcGc@._V1_.jpg',
    'https://m.media-amazon.com/images/M/MV5BNDdhOGJhYzctYzYwZC00YmI2LWI0MjctYjg4ODdlMDExYjBlXkEyXkFqcGc@._V1_FMjpg_UX1000_.jpg',
    'https://m.media-amazon.com/images/M/MV5BZTQ2MDhmMWMtZjk4Ni00ZDM1LWFjNGEtYzhkNWRmMjk1NzI0XkEyXkFqcGc@._V1_.jpg',
    'https://m.media-amazon.com/images/M/MV5BNGMxZDBhNGQtYTZlNi00N2UzLWI4NDEtNmUzNWM2NTdmZDA0XkEyXkFqcGc@._V1_.jpg',
    'https://m.media-amazon.com/images/M/MV5BOTgyOGQ1NDItNGU3Ny00MjU3LTg2YWEtNmEyYjBiMjI1Y2M5XkEyXkFqcGc@._V1_FMjpg_UX1000_.jpg',
    'https://m.media-amazon.com/images/M/MV5BNjM1ZDQxYWUtMzQyZS00MTE1LWJmZGYtNGUyNTdlYjM3ZmVmXkEyXkFqcGc@._V1_.jpg',
    'https://i.pinimg.com/736x/51/ba/64/51ba64b2e61f820e0e86bdd2f4c6e92c.jpg'
]

top10_titles


Applying cosine similarity to get the euclidean distance of each movies from a given movie.

In [None]:
similarity_score = cosine_similarity(movies_ids)

Getting the top 5 movies that has the higest cosine similarity.

In [None]:
def recommendedMoviesList(title):
  mId = movies_name.loc[movies_name['title'] == title, 'movieId'].iloc[0] #type: ignore
  index = np.where(movies_ids.index==mId)[0][0]
  suggested_movies_index = []
  suggested_movies = []
  similar_movies = sorted(list(enumerate(similarity_score[index])), key= lambda x:x[1], reverse=True)[1:6]
  for movie in similar_movies:
    suggested_movies_index.append(movie[0])
  for idx in suggested_movies_index:
    movie_id = movies_ids.index[idx]
    movie_name = movies_name.loc[movies_name['movieId'] == movie_id, 'title'].iloc[0]
    suggested_movies.append(movie_name)
  return suggested_movies

recommendedMoviesList('Avengers: Age of Ultron (2015)')

Using request module to make Get request to OMDb for the poster urls.

In [None]:
import requests

API_KEY = "cd6aff26"

def get_movie_data(title, year=None):
    """
    Query OMDb API with title and optional year.
    Returns the full JSON response.
    """
    base_url = "http://www.omdbapi.com/"
    params = {
        "apikey": API_KEY,
        "t": title,
    }
    if year:
        params["y"] = year

    response = requests.get(base_url, params=params)
    return response.json()

def get_poster_url(title, year=None):
    """
    Convenience wrapper: directly return poster URL if available.
    """
    data = get_movie_data(title, year)
    if data.get("Response") == "True" and data.get("Poster") != "N/A":
        return data["Poster"]
    return None


Testing the API call for all recommended movies.

In [None]:
poster_urls = []
showMovies = recommendedMoviesList("Avengers, The (2012)")
for movie in showMovies:

  if '(' in movie and ')' in movie:
      title_part = movie.rsplit('(', 1)[0].strip()
      year_part = movie.rsplit('(', 1)[1].replace(')', '').strip()
  else:
      title_part = movie
      year_part = None

  poster_url = get_poster_url(title_part, year_part)
  poster_urls.append(poster_url)

poster_urls
