In [6]:
import pandas as pd
from pathlib import Path
import ast
import datetime

In [2]:

def is_popular(tmdb_count, imdb_votes):
    return (tmdb_count > 10000) or (imdb_votes > 10000)

In [3]:
BASE_DIR = Path.cwd().parent.parent
# BASE_DIR = Path(__file__).resolve().parent.parent.parent
SILVER_DIR = BASE_DIR / "data" / "2_silver"

# TMDB files
movies_path = f"{SILVER_DIR}\\base_movies_and_shows.csv"

all_data = pd.read_csv(movies_path, sep=';', index_col=0)

In [4]:
# Cast imdb_votes to integer from string with commas
all_data['imdb_count'] = (all_data['imdb_count'].fillna(-1).astype(str).str.replace(",", "").astype(int))

# Replace nulls in ratings
all_data['imdb_rating'] = all_data['imdb_rating'].fillna(-1)
all_data['tmdb_rating'] = all_data['tmdb_rating'].fillna(-1)

# Replace nulls in release_date
all_data['release_date'] = all_data['release_date'].fillna('1111-11-11')

# Convert String dates (YYYY-MM-dd) in Datetime and transform to Spanish format (dd/MM/YYYY)
all_data['release_date'] = all_data['release_date'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').strftime('%d/%m/%Y'))

# Create new column 'is_popular' based on the number of votes
all_data['is_popular'] = all_data.apply(lambda row: is_popular(row['tmdb_count'], row['imdb_count']), axis=1)

# For simplicity, pick only records with 2 ratings available
all_data = all_data[(all_data['imdb_rating'] != -1) & (all_data['tmdb_rating'] != -1)]

# Merge data and watch providers 


In [5]:
all_data

Unnamed: 0,type,id,title,overview,release_date,genre,tmdb_rating,tmdb_count,imdb_rating,imdb_count,is_popular
0,movie,278,Cadena perpetua,"Acusado del asesinato de su mujer, Andrew Dufr...",23/09/1994,"['Crimen', 'Drama']",8.700,28269,9.3,3042120,True
1,movie,238,El padrino,"Don Vito Corleone, conocido dentro de los círc...",14/03/1972,"['Crimen', 'Drama']",8.687,21424,9.2,2121667,True
2,movie,240,El Padrino Parte II,Continuación de la saga de los Corleone con do...,20/12/1974,"['Crimen', 'Drama']",8.571,12945,9.0,1424468,True
3,movie,424,La lista de Schindler,"Oskar Schindler, un hombre de enorme astucia y...",15/12/1993,"['Drama', 'Historia', 'Bélica']",8.565,16418,9.0,1517035,True
4,movie,389,12 hombres sin piedad,Tras escuchar todos los testimonios y valorar ...,10/04/1957,['Drama'],8.548,9116,9.0,920922,True
...,...,...,...,...,...,...,...,...,...,...,...
10933,show,1877,Phineas y Ferb,Phineas y Ferb son dos hermanastros que viven ...,17/08/2007,"['Animación', 'Comedia', 'Familia', 'Sci-Fi & ...",7.882,897,8.1,60263,True
10934,show,36983,Rosario + Vampire,"Aono Tsukune es un estudiante de 15 años, el c...",03/01/2008,"['Animación', 'Comedia', 'Sci-Fi & Fantasy']",7.881,269,6.8,3855,False
10935,show,124800,Amor y Muerte,La verdadera historia de Candy y Pat Montgomer...,27/04/2023,"['Crimen', 'Drama']",7.879,368,7.5,42919,True
10936,show,68507,La materia oscura,Lyra es una huérfana que vive en un universo p...,03/11/2019,"['Drama', 'Sci-Fi & Fantasy']",7.874,1724,7.8,90305,True


In [12]:
all_data[all_data['is_popular']]

Unnamed: 0,type,id,title,overview,release_date,genre,tmdb_rating,tmdb_count,imdb_rating,imdb_count,is_popular
0,movie,278,Cadena perpetua,"Acusado del asesinato de su mujer, Andrew Dufr...",23/09/1994,"['Crimen', 'Drama']",8.700,28269,9.3,3042120,True
1,movie,238,El padrino,"Don Vito Corleone, conocido dentro de los círc...",14/03/1972,"['Crimen', 'Drama']",8.687,21424,9.2,2121667,True
2,movie,240,El Padrino Parte II,Continuación de la saga de los Corleone con do...,20/12/1974,"['Crimen', 'Drama']",8.571,12945,9.0,1424468,True
3,movie,424,La lista de Schindler,"Oskar Schindler, un hombre de enorme astucia y...",15/12/1993,"['Drama', 'Historia', 'Bélica']",8.565,16418,9.0,1517035,True
4,movie,389,12 hombres sin piedad,Tras escuchar todos los testimonios y valorar ...,10/04/1957,['Drama'],8.548,9116,9.0,920922,True
...,...,...,...,...,...,...,...,...,...,...,...
10935,show,124800,Amor y Muerte,La verdadera historia de Candy y Pat Montgomer...,27/04/2023,"['Crimen', 'Drama']",7.879,368,7.5,42919,True
10936,show,68507,La materia oscura,Lyra es una huérfana que vive en un universo p...,03/11/2019,"['Drama', 'Sci-Fi & Fantasy']",7.874,1724,7.8,90305,True
10937,show,1447,Psych,Shawn es un joven muy distinto a los demás. De...,07/07/2006,"['Comedia', 'Crimen', 'Drama', 'Misterio']",7.874,856,8.4,116003,True
10946,show,93405,El juego del calamar,Cientos de personas con problemas de dinero ac...,17/09/2021,"['Action & Adventure', 'Drama', 'Misterio']",7.862,15772,-1.0,-1,True
