In [1]:
# https://www.kaggle.com/datasets/tmdb/tmdb-movie-metadata

In [2]:
import numpy as np
import pandas as pd

In [3]:
credits = pd.read_csv('../input/tmdb-movie-metadata/tmdb_5000_credits.csv')
credits.head()

In [4]:
movies_df = pd.read_csv("../input/tmdb-movie-metadata/tmdb_5000_movies.csv")
movies_df.head()

In [5]:
print(credits.shape)
print(movies_df.shape)

In [6]:
print(credits.columns)
print(movies_df.columns)

In [7]:
credits_column_renamed = credits.rename(index=str, columns={'movie_id': 'id'})
movies_df_merge = movies_df.merge(credits_column_renamed, on='id')
movies_df_merge.head()

In [8]:
movies_cleaned_df = movies_df_merge.drop(columns=['homepage', 'title_x', 'title_y', 'status','production_countries'])
movies_cleaned_df.head()

In [9]:
movies_cleaned_df.info()

![](http://trailerpark.weebly.com/uploads/8/8/5/5/8855465/7628808.png?371)

This is the formula we will be using in this method.

In [10]:
v = movies_cleaned_df['vote_count']
R = movies_cleaned_df['vote_average']
C = movies_cleaned_df['vote_average'].mean()
m = movies_cleaned_df['vote_count'].quantile(0.70)

In [11]:
print(C, m)

In [12]:
movies_cleaned_df['weighted_average'] = ((R*v) + (C*m)) / (v+m)

In [13]:
movies_cleaned_df.head()

In [14]:
import matplotlib.pyplot as plt
import seaborn as sns

movie_sorted_ranking = movies_cleaned_df.sort_values('weighted_average',ascending=False)
weight_average=movie_sorted_ranking.sort_values('weighted_average',ascending=False)
plt.figure(figsize=(12,6))
axis1=sns.barplot(x=weight_average['weighted_average'].head(10), y=weight_average['original_title'].head(10), data=weight_average)
plt.xlim(4, 10)
plt.title('Best Movies by average votes', weight='bold')
plt.xlabel('Weighted Average Score', weight='bold')
plt.ylabel('Movie Title', weight='bold')
plt.savefig('best_movies.png')
movie_sorted_ranking[['original_title', 'vote_count', 'vote_average', 'weighted_average', 'popularity']].head(10)

In [15]:
import matplotlib.pyplot as plt
import seaborn as sns

weight_average=movie_sorted_ranking.sort_values('weighted_average',ascending=False)
plt.figure(figsize=(12,6))
axis1=sns.barplot(x=weight_average['weighted_average'].head(10), y=weight_average['original_title'].head(10), data=weight_average)
plt.xlim(4, 10)
plt.title('Best Movies by average votes', weight='bold')
plt.xlabel('Weighted Average Score', weight='bold')
plt.ylabel('Movie Title', weight='bold')
plt.savefig('best_movies.png')

In [16]:
popularity=movie_sorted_ranking.sort_values('popularity',ascending=False)
plt.figure(figsize=(12,6))
ax=sns.barplot(x=popularity['popularity'].head(10), y=popularity['original_title'].head(10), data=popularity)

plt.title('Most Popular by Votes', weight='bold')
plt.xlabel('Score of Popularity', weight='bold')
plt.ylabel('Movie Title', weight='bold')
plt.savefig('best_popular_movies.png')

In [17]:
popularity.head()

**Recommendation based on scaled weighted average and popularity score(Priority is given 50% to both)**

In [18]:
from sklearn.preprocessing import MinMaxScaler

scaling = MinMaxScaler()
movie_scaled_df = scaling.fit_transform(movies_cleaned_df[['weighted_average', 'popularity']])
movie_normalized_df = pd.DataFrame(movie_scaled_df, columns=['weighted_average','popularity'])

In [19]:
movie_normalized_df.head()

In [20]:
movies_cleaned_df[['normalized_weight_average','normalized_popularity']]= movie_normalized_df

In [21]:
movies_cleaned_df.head()

In [22]:
movies_cleaned_df['score'] = movies_cleaned_df['normalized_weight_average'] * 0.5 + movies_cleaned_df['normalized_popularity'] * 0.5
movies_scored_df = movies_cleaned_df.sort_values(['score'], ascending=False)
movies_scored_df[['original_title', 'normalized_weight_average', 'normalized_popularity', 'score']].head(20)

In [23]:
scored_df = movies_cleaned_df.sort_values('score', ascending=False)

plt.figure(figsize=(16,6))

ax = sns.barplot(x=scored_df['score'].head(10), y=scored_df['original_title'].head(10), data=scored_df, palette='deep')

#plt.xlim(3.55, 5.25)
plt.title('Best Rated & Most Popular Blend', weight='bold')
plt.xlabel('Score', weight='bold')
plt.ylabel('Movie Title', weight='bold')

plt.savefig('scored_movies.png')