# Fandango Project

**- Objetivo:** Determinar si la calificación que Fandango colocaba a las películas durante el 2015 estaban manipulados para poder vender más boletos de cine.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Parte 1: Analizar el puntaje de Fandango vs el puntaje de los usuarios

In [None]:
fandango = pd.read_csv('fandango_scrape.csv')

In [None]:
fandango.head()

In [None]:
fandango.info()

In [None]:
round(fandango.describe(),2)

In [None]:
# Se analiza la relación entre la popularidad del film y su puntaje 

plt.figure(figsize=(10,4), dpi=200)
sns.scatterplot(data=fandango, x='RATING', y='VOTES')

In [None]:
round(fandango.corr(),2)

In [None]:
# Se crea una nueva columna extrayendo el año de la columna FILM

fandango['release_year'] = fandango['FILM'].apply(lambda title:title.split('(')[-1])

In [None]:
fandango['release_year'] = fandango['release_year'].apply(lambda title:title.split(')')[-2])

In [None]:
fandango

In [None]:
fandango['release_year'].value_counts()

In [None]:
sns.countplot(data=fandango, x='release_year')

In [None]:
fandango.nlargest(10,'VOTES')

In [None]:
sum(fandango['VOTES']==0)

In [None]:
# Nos quedamos solo con las películas que tengan una calificación

reviewed = fandango[fandango['VOTES']>0]

In [None]:
reviewed

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.kdeplot(data=reviewed, x='RATING', clip=[0,5], fill=True, label='TRUE RATING')
sns.kdeplot(data=reviewed, x='STARS', clip=[0,5], fill=True, label='STARS DISPLAYED')
plt.legend(loc=(1.05,0.5))

In [None]:
# Se crea una columna para conocer la diferencia entre el puntaje mostrado y el de los usuarios

reviewed['STARS_DIFF'] = reviewed['STARS'] - reviewed['RATING']
reviewed['STARS_DIFF'] = reviewed['STARS_DIFF'].round(2)
reviewed

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.countplot(data=reviewed, x='STARS_DIFF')

In [None]:
reviewed[reviewed['STARS_DIFF'] == 1]

# Parte 2: Comparamos los puntajes de Fandango con otros sitios

In [None]:
all_sites = pd.read_csv('all_sites_scores.csv')

In [None]:
all_sites.head()

In [None]:
all_sites.info()

In [None]:
round(all_sites.describe(),2)

### Rotten Tomatoes

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.scatterplot(data=all_sites, x='RottenTomatoes', y='RottenTomatoes_User')

In [None]:
all_sites['Rotten_Diff'] = all_sites['RottenTomatoes'] - all_sites['RottenTomatoes_User']

In [None]:
all_sites['Rotten_Diff'].apply(abs).mean()

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.histplot(data=all_sites, x='Rotten_Diff', kde=True, bins=20)
plt.title('RT Critics Score minus RT User Score')

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.histplot(x=all_sites['Rotten_Diff'].apply(abs), kde=True, bins=18)
plt.title('Abs Difference  between RT Critics Score and RT User Score')

In [None]:
all_sites[['FILM','Rotten_Diff']].nsmallest(5,'Rotten_Diff')

In [None]:
all_sites[['FILM','Rotten_Diff']].nlargest(5,'Rotten_Diff')

### Metacritic

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.scatterplot(data=all_sites, x='Metacritic', y='Metacritic_User')
plt.xlim(0,100)
plt.ylim(0,10)

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.scatterplot(data=all_sites, x='Metacritic_user_vote_count', y='IMDB_user_vote_count')

### IMDB

In [None]:
all_sites.nlargest(1,'IMDB_user_vote_count')

In [None]:
all_sites.nlargest(1,'Metacritic_user_vote_count')

# Puntajes de Fandango vs otras webs

In [None]:
df = pd.merge(fandango,all_sites,on='FILM',how='inner')

In [None]:
df.info()

In [None]:
df.head()

In [None]:
# Normalizamos los puntajes

df['RT_Norm'] = np.round(df['RottenTomatoes']/20,1)
df['RTU_Norm'] = np.round(df['RottenTomatoes_User']/20,1)
df['Meta_Norm'] = np.round(df['Metacritic']/20,1)
df['Meta_U_Norm'] = np.round(df['Metacritic_User']/2,1)
df['IMDB_Norm'] = np.round(df['IMDB']/2,1)

In [None]:
df.head()

In [None]:
norm_scores = df[['STARS', 'RATING','RT_Norm','RTU_Norm','Meta_Norm','Meta_U_Norm','IMDB_Norm']]

In [None]:
norm_scores.head()

# Comparamos los puntajes entre todos

In [None]:
def move_legend(ax, new_loc, **kws):
    old_legend = ax.legend_
    handles = old_legend.legendHandles
    labels = [t.get_text() for t in old_legend.get_texts()]
    title = old_legend.get_title().get_text()
    ax.legend(handles, labels, loc=new_loc, title=title, **kws)

In [None]:
fig, ax = plt.subplots(figsize=(10,4),dpi=200)
sns.kdeplot(data=norm_scores, clip=[0,5], fill=True, ax=ax)
move_legend(ax, "upper left")

In [None]:
fig, ax = plt.subplots(figsize=(10,4), dpi=2000)
sns.kdeplot(data=norm_scores[['RT_Norm','STARS']], clip=[0,5], fill=True, ax=ax)
move_legend(ax, "upper left")

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.histplot(data=norm_scores, bins=50)

# ¿Cómo están puntuadas las peores películas en las demás plataformas?

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.clustermap(data=norm_scores, col_cluster=False)

### ¡Claramente Fandango está puntuando las películas muy por encima de lo que hacen otras páginas web!

In [None]:
norm_scores['FILM'] = df['FILM']

In [None]:
norm_scores.nsmallest(10,'RT_Norm')

In [None]:
plt.figure(figsize=(10,4), dpi=200)
sns.kdeplot(data=norm_scores.nsmallest(10,'RT_Norm'), clip=[0,5], fill=True)
plt.title("Rating for RT Critic's 10 Worst Reviewed Films")

### Revisamos la película "Taken 3"

In [None]:
norm_scores.iloc[25]

In [None]:
(0.4+2.3+1.3+2.3+3.0)/5