In [1]:
# Imports to clean the data
#import polars as pl
import pandas as pd
import numpy as np

Cargar los dataframes

In [49]:

amazon_df = pd.read_csv('./Datasets/amazon_prime_titles-score.csv')
disney_plus_df = pd.read_csv('./Datasets/disney_plus_titles-score.csv')
netflix_title_df = pd.read_csv('./Datasets/netflix_titles-score.csv')
hulu_df = pd.read_csv('./Datasets/hulu_titles-score (2).csv')


amazon_df.sort_values(by=['release_year', 'duration_type'])['title']


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,score
5547,s5548,Movie,"""Mixed Up""",Nishi Chawla,"UDAY KRISHNA, BETHANY RISHELL, LUCY BOND, SANJ...",,,2020,16+,106 min,"Drama, Romance","""Mixed Up"" examines casual factors that make u...",30
5977,s5978,TV Show,"""The Paramedic Angel""",,"Nate Reidnauer, Nikki Hrichak, Nina Randazzo, ...",,,2021,ALL,1 Season,Drama,The tragedy of a loving family man and paramed...,100
7556,s7557,Movie,#Home,Rojin Thomas,"Indrans, Sreenath Bhasi, Vijay Babu, Manju Pil...",,,2021,13+,161 min,Drama,"Home is about the humble, technology-challenge...",4
7555,s7556,Movie,#Home (4K UHD),Rojin Thomas,"Indrans, Sreenath Bhasi, Vijay Babu, Manju Pil...",,,2021,13+,161 min,Drama,"Home is about the humble, technology-challenge...",62
5606,s5607,TV Show,#IGotThis,,,,,2020,TV-PG,1 Season,Drama,Meet remarkable people who have discovered way...,53
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6065,s6066,Movie,Zomblies,David M Reynolds,"David M Reynolds, Christopher Dane",,,2010,18+,47 min,Horror,When rookie zombie hunters send out a distress...,60
17,s18,TV Show,Zoo Babies,,Narrator - Gillian Barlett,,,2008,ALL,1 Season,"Kids, Special Interest",A heart warming and inspiring series that welc...,30
16,s17,Movie,Zoombies,Glenn Miller,"Marcus Anderson, Kaiwi Lyman, Andrew Asper",,,2016,13+,87 min,"Horror, Science Fiction",When a strange virus quickly spreads through a...,92
18,s19,TV Show,Zoë Coombs Marr: Bossy Bottom,,Zoë Coombs Marr,,,2020,18+,1 Season,"Comedy, Talk Show and Variety",Zoë Coombs Marr has been on hiatus. Sort of. F...,49


Los valores nulos del campo rating deberán reemplazarse por el string “**`G`**”

In [None]:
def fill_na(df):
    """
    Rellena valores NaN en la columna 'rating' con el valor 'G'
    
    Argumentos:
    df -- DataFrame de entrada
    
    Devuelve:
    df -- DataFrame con valores NaN en la columna 'rating' rellenados con 'G'
    """
    df['rating'].fillna(value='G', inplace=True)
    return df

fill_na(amazon_df)
fill_na(disney_plus_df)
fill_na(netflix_title_df)
fill_na(hulu_df)

Crear columna *id* y ponerla en su lugar

In [None]:
def generate_id(df):
    """
    Genera un ID combinando la primera letra del título y el número de show_id
    
    Argumentos:
    df -- DataFrame de entrada
    
    Devuelve:
    str -- ID generado
    """
    plataform_name = df['title']
    show_id = df['show_id']
    return plataform_name[0] + str(show_id)

def create_id_column(df):
    """
    Crea una columna 'id' en el DataFrame que contiene los IDs generados
    
    Argumentos:
    df -- DataFrame de entrada
    
    Devuelve:
    df -- DataFrame con una columna 'id' que contiene los IDs generados
    """
    df['id'] = df.apply(generate_id, axis=1)
    pop_id = df.pop('id')
    df.insert(0, 'id', pop_id)
    return df

create_id_column(amazon_df)
create_id_column(disney_plus_df)
create_id_column(hulu_df)
create_id_column(netflix_title_df)

De haber fechas, deberán tener el formato **`AAAA-mm-dd`**

In [None]:
def format_dates(df):
    """
    Formatea las fechas en el formato 'YYYY-MM-DD'
    
    Argumentos:
    df -- DataFrame de entrada
    
    Devuelve:
    df -- DataFrame con fechas formateadas en el formato 'YYYY-MM-DD'
    """   
    df['date_added'] = pd.to_datetime(df['date_added']).dt.strftime('%Y-%m-%d')
    return df

format_dates(amazon_df)
format_dates(hulu_df)
format_dates(disney_plus_df)
format_dates(netflix_title_df)



Los campos de texto deberán estar en **minúsculas**, sin excepciones

In [None]:
def lower_case(df):
    """Convierte todos los valores en las columnas de tipo 'object' de un DataFrame de Pandas a minúsculas.
    
    Argumentos:
    df(pandas.DataFrame): DataFrame a ser modificado.
    
    Devuelve:
    pandas.DataFrame: DataFrame con todos los valores en las columnas de tipo 'object' convertidos a minúsculas.
    """
    string_columns = [column for column in df.columns if df[column].dtype == 'object']
    df[string_columns] = df.loc[:,string_columns].apply(lambda x: x.str.lower())
    return df
lower_case(amazon_df)
lower_case(hulu_df)
lower_case(disney_plus_df)
lower_case(netflix_title_df)



El campo ***duration*** debe convertirse en dos campos: **`duration_int`** y **`duration_type`**. El primero será un integer y el segundo un string indicando la unidad de medición de duración: min (minutos) o season (temporadas)

In [None]:
def duration_distrib(df):
    """
    Divide la columna 'duration' en dos columnas 'duration_int' y 'duration_type'
    
    Argumentos:
    df -- DataFrame de entrada
    
    Devuelve:
    df -- DataFrame con dos columnas adicionales 'duration_int' y 'duration_type'
    """

    df[['duration_int', 'duration_type']] = df['duration'].str.split(expand=True)
    df['duration_int'] = df['duration_int'].astype(float).astype('Int64')
    df.drop(columns='duration', inplace=True)
    return df

duration_distrib(amazon_df)
duration_distrib(disney_plus_df)
duration_distrib(hulu_df)
duration_distrib(netflix_title_df)

##### DEVELOPING AN API TO LOADING THOSE DATAFRAMES

In [None]:
# Imports for the api 
from fastapi import FastAPI
from fastapi.responses import JSONResponse


In [None]:

df_dict = dict(amazon_df=amazon_df, disney_plus=disney_plus_df,
               netflix=netflix_title_df, hulu=hulu_df)

app = FastAPI()


@app.get('/{df}')
def get_dataframe(df_name: str):
    if df_name not in df_dict:
        return JSONResponse(content={"error": "DataFrame not found"})
    df = df_dict[df_name]
    return JSONResponse(content=df.to_json(orient='index'))