#### Import the modules that we will use in our API functions

In [2]:
import numpy as np
import pandas as pd
import random

#### Load clean dataset

In [33]:
clean_dataset = pd.read_csv("../datasets/clean_movies_dataset.csv")

#### Take a look at the clean dataset

In [4]:
clean_dataset.head(3)

Unnamed: 0,id,belongs_to_collection,genres,original_language,overview,popularity,production_companies,production_countries,release_date,release_year,...,release_weekday,revenue,budget,return,runtime,spoken_languages,status,tagline,title,vote_average
0,862,Toy Story Collection,"['Animation', 'Comedy', 'Family']",en,"Led by Woody, Andy's toys live happily in his ...",21.946943,['Pixar Animation Studios'],['United States of America'],1995-10-30,1995,...,lunes,373554033.0,30000000.0,12.451801,81.0,['English'],Released,,Toy Story,7.7
1,8844,,"['Adventure', 'Fantasy', 'Family']",en,When siblings Judy and Peter discover an encha...,17.015539,"['TriStar Pictures', 'Teitler Film', 'Intersco...",['United States of America'],1995-12-15,1995,...,viernes,262797249.0,65000000.0,4.043035,104.0,"['English', 'Français']",Released,Roll the dice and unleash the excitement!,Jumanji,6.9
2,15602,Grumpy Old Men Collection,"['Romance', 'Comedy']",en,A family wedding reignites the ancient feud be...,11.7129,"['Warner Bros.', 'Lancaster Gate']",['United States of America'],1995-12-22,1995,...,viernes,0.0,0.0,0.0,101.0,['English'],Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,6.5


In [5]:
clean_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45376 entries, 0 to 45375
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     45376 non-null  int64  
 1   belongs_to_collection  4488 non-null   object 
 2   genres                 42992 non-null  object 
 3   original_language      45365 non-null  object 
 4   overview               44435 non-null  object 
 5   popularity             45376 non-null  float64
 6   production_companies   33580 non-null  object 
 7   production_countries   39165 non-null  object 
 8   release_date           45376 non-null  object 
 9   release_year           45376 non-null  int64  
 10  release_month          45376 non-null  object 
 11  release_weekday        45376 non-null  object 
 12  revenue                45376 non-null  float64
 13  budget                 45376 non-null  float64
 14  return                 45376 non-null  float64
 15  ru

#### Create function that return the historical movies per month

In [71]:
def peliculas_mes(mes):
    
    months = list(clean_dataset["release_month"].unique())
    mes = mes.lower()
    if mes in months:
        month_movies = clean_dataset.loc[clean_dataset["release_month"] == mes].count()
        return {"mes" : mes, "cantidad" : month_movies["release_month"]}
    else: return "Ingrese un mes válido"

#### Test function

In [74]:
month = list(clean_dataset["release_month"].unique())
random_month = random.choice(month)
random_month
print(peliculas_mes(random_month))
print(peliculas_mes("asjnfca"))
print(peliculas_mes("DICiemBre"))

{'mes': 'febrero', 'cantidad': 3032}
Ingrese un mes válido
{'mes': 'diciembre', 'cantidad': 3786}


#### Create function that return the historical movies per day

In [76]:
def peliculas_dia(dia):
    
    days = list(clean_dataset["release_weekday"].unique())
    dia = dia.lower()
    if dia in days:
        weekday_movies = clean_dataset.loc[clean_dataset["release_weekday"] == dia].count()
        return {"dia" : dia, "cantidad" : weekday_movies["release_weekday"]}
    else: return "Ingrese un dia válido"

#### Test function

In [79]:
weekday = list(clean_dataset["release_weekday"].unique())
random_weekday = random.choice(weekday)
random_weekday
print(peliculas_dia(random_weekday))
print(peliculas_dia("aasndj"))
print(peliculas_dia("MARTEs"))

{'dia': 'sábado', 'cantidad': 5151}
Ingrese un dia válido
{'dia': 'martes', 'cantidad': 4641}


#### Create a function that return the number of movies, total and average earnings per franchise

In [96]:
def franquicia(franquicia):
    
    franchise = clean_dataset["belongs_to_collection"].unique()
    franchise = list(franchise)
    if franquicia in franchise:
        franchise = clean_dataset.loc[clean_dataset["belongs_to_collection"] == franquicia]
        count = franchise["belongs_to_collection"].count()
        total_earning = franchise["revenue"].sum()
        earning_mean = franchise["revenue"].mean()
        return {"franquicia" : franquicia, "cantidad" : count, "ganancia_total" : round(total_earning, 2), "ganancia_promedio" : round(earning_mean, 2)}
    
    else: return "Ingrese una franquicia válida"

#### Test function

In [123]:
franchise = clean_dataset["belongs_to_collection"].unique()
franchise = list(franchise)
random_franchise = random.choice(franchise)
random_franchise
print(franquicia(random_franchise))
print(franquicia("dkmvk"))

{'franquicia': 'Subspecies Collection', 'cantidad': 5, 'ganancia_total': 0.0, 'ganancia_promedio': 0.0}
Ingrese una franquicia válida


#### Create a function that return the movies per country

In [51]:
def peliculas_pais(pais):
    
    clean_dataset["production_countries"] = clean_dataset["production_countries"].fillna("")
    country = clean_dataset.loc[clean_dataset["production_countries"].str.contains(pais)]
    count = country["production_countries"].count()
    if count == 0:
        return "Ingrese un país válido"
    else: return {"pais" : pais, "cantidad" : count}

#### Test function

In [52]:
clean_dataset["production_countries"]

0                             ['United States of America']
1                             ['United States of America']
2                             ['United States of America']
3                             ['United States of America']
4                             ['United States of America']
                               ...                        
45371    ['Canada', 'Germany', 'United Kingdom', 'Unite...
45372                                      ['Philippines']
45373                         ['United States of America']
45374                                           ['Russia']
45375                                   ['United Kingdom']
Name: production_countries, Length: 45376, dtype: object

In [57]:
print(peliculas_pais("United Kingdom"))
print(peliculas_pais("Canada"))
print(peliculas_pais("Germany"))
print(peliculas_pais("Philippines"))
print(peliculas_pais("Russia"))
print(peliculas_pais("United States of America"))
print(peliculas_pais("vasijn"))

{'pais': 'United Kingdom', 'cantidad': 4091}
{'pais': 'Canada', 'cantidad': 1765}
{'pais': 'Germany', 'cantidad': 2260}
{'pais': 'Philippines', 'cantidad': 83}
{'pais': 'Russia', 'cantidad': 911}
{'pais': 'United States of America', 'cantidad': 21147}
Ingrese un país válido


#### Create a function that return the total earnings and movies per company

In [62]:
def productoras(productora):
    
    clean_dataset["production_companies"] = clean_dataset["production_companies"].fillna("")
    company = clean_dataset.loc[clean_dataset["production_companies"].str.contains(productora)]
    count = company["production_companies"].count()
    total_earning = company["revenue"].sum()
    if count == 0:
        return "Ingrese una productora válida"
    else: return {"productora" : productora, "ganancia_total" : round(total_earning, 2), "cantidad" : count}

#### Test function

In [60]:
clean_dataset["production_companies"]

0                              ['Pixar Animation Studios']
1        ['TriStar Pictures', 'Teitler Film', 'Intersco...
2                       ['Warner Bros.', 'Lancaster Gate']
3               ['Twentieth Century Fox Film Corporation']
4         ['Sandollar Productions', 'Touchstone Pictures']
                               ...                        
45371    ['Westdeutscher Rundfunk (WDR)', 'Working Titl...
45372                                      ['Sine Olivia']
45373                          ['American World Pictures']
45374                                        ['Yermoliev']
45375                                                  NaN
Name: production_companies, Length: 45376, dtype: object

In [65]:
print(productoras("Pixar Animation Studios"))
print(productoras("TriStar Pictures"))
print(productoras("Teitler Film"))
print(productoras("Warner Bros."))
print(productoras("Lancaster Gate"))
print(productoras("Twentieth Century Fox Film Corporation"))
print(productoras("sdgsd"))

{'productora': 'Pixar Animation Studios', 'ganancia_total': 11188533734.0, 'cantidad': 52}
{'productora': 'TriStar Pictures', 'ganancia_total': 8022366764.0, 'cantidad': 197}
{'productora': 'Teitler Film', 'ganancia_total': 327118750.0, 'cantidad': 2}
{'productora': 'Warner Bros.', 'ganancia_total': 67156995259.0, 'cantidad': 1374}
{'productora': 'Lancaster Gate', 'ganancia_total': 0.0, 'cantidad': 2}
{'productora': 'Twentieth Century Fox Film Corporation', 'ganancia_total': 47687746332.0, 'cantidad': 836}
Ingrese una productora válida


#### Create a function that return the investment, earning, return and release year per movie

In [86]:
def retorno(pelicula):
    
    titles = list(clean_dataset["title"].unique())
    if pelicula in titles:
        movie = clean_dataset.loc[clean_dataset["title"] == pelicula]
        movie = movie.head(1)
        investment = movie["budget"].iloc[0]
        earning = movie["revenue"].iloc[0]
        returns = movie["return"].iloc[0]
        realese_year = movie["release_year"].iloc[0]
        return {"pelicula" : pelicula, "inversion" : round(investment, 2), "ganacia" : round(earning, 2), "retorno" : round(returns, 2), "anio" : realese_year}
    else: return "Ingrese un película válida"

#### Test function

In [90]:
movie = clean_dataset["title"].unique()
movie = list(movie)
random_movie = random.choice(movie)
random_movie
print(retorno(random_movie))
print(retorno("asdno"))

{'pelicula': 'Manson, My Name Is Evil', 'inversion': 0.0, 'ganacia': 0.0, 'retorno': 0.0, 'anio': 2009}
Ingrese un película válida
