---
### This notebook is for testing all inner function before calling them in the api

---

---
### Import pandas and load data 

---

In [1]:
import pandas as pd

df = pd.read_csv('./data/all_together_with_score.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)
print("df shape --> ",df.shape)

df shape -->  (22998, 16)


In [2]:
df.head()

Unnamed: 0,id,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,duration_int,duration_type,score
0,as1,s1,movie,the grand seduction,don mckellar,"brendan gleeson, taylor kitsch, gordon pinsent",canada,2021/03/30,2014,g,113 min,"comedy, drama",a small fishing village must procure a local d...,113,min,3.467131
1,as2,s2,movie,take care good night,girish joshi,"mahesh manjrekar, abhay mahajan, sachin khedekar",india,2021/03/30,2018,13+,110 min,"drama, international",a metro family decides to fight a cyber crimin...,110,min,3.548682
2,as3,s3,movie,secrets of deception,josh webber,"tom sizemore, lorenzo lamas, robert lasardo, r...",united states,2021/03/30,2017,g,74 min,"action, drama, suspense",after a man discovers his wife is cheating on ...,74,min,3.5
3,as4,s4,movie,pink: staying true,sonia anderson,"interviews with: pink, adele, beyoncé, britney...",united states,2021/03/30,2014,g,69 min,documentary,"pink breaks the mold once again, bringing her ...",69,min,3.538055
4,as5,s5,movie,monster maker,giles foster,"harry dean stanton, kieran o'brien, george cos...",united kingdom,2021/03/30,1989,g,45 min,"drama, fantasy",teenage matt banting wants to work with a famo...,45,min,3.478992


---
### get_max_duration function

---

In [3]:

def get_max_duration_inner(df:pd.DataFrame, year=None, platform=None, duration_type=None ):

    """
        This function returns the movie with longer duration with optional filters of year, 
        platform and duration_type
    """
    
    df_filtered = df[['id','title','release_year','duration_int', 'duration_type']]
    

    cond_year =  df_filtered['release_year'] == year if year else pd.Series(True, index=df_filtered.index)
    cond_platform = df_filtered['id'].str.startswith(platform.lower()[0]) if platform else pd.Series(True, index=df_filtered.index)
    cond_dur_type = df_filtered['duration_type']==duration_type.lower() if duration_type else pd.Series(True, index=df_filtered.index)
    mask = cond_year & cond_platform & cond_dur_type
    df_filtered = df_filtered[mask]
    
    
    # Sort and get biggest
    result = None
    if df_filtered.shape[0]>0:
        result = df_filtered.sort_values(by='duration_int',ascending=False).head(1).title.values[0]   
    return result

---
Call without filters (or params)

---

In [4]:
my_year=None
my_duration_type = None
my_platform = None
res = get_max_duration_inner(df, my_year, my_platform, my_duration_type)
print("movie --> ", res)

movie -->  soothing surf at del norte for sleep black screen


---
call with year filter only

---

In [6]:
my_year=2019
my_duration_type = None
my_platform = None
res = get_max_duration_inner(df, my_year, my_platform, my_duration_type)
print("movie --> ", res)

movie -->  box fan medium  8 hours for sleep


---
call with platform filter

---

In [7]:
my_year=None
my_duration_type = None
my_platform = "Netflix"
res = get_max_duration_inner(df, my_year, my_platform, my_duration_type)
print("movie --> ", res)

movie -->  black mirror: bandersnatch


---
call with duration_type filter

---

In [8]:
my_year=None
my_duration_type = "seasons"
my_platform = None
res = get_max_duration_inner(df, my_year, my_platform, my_duration_type)
print("movie --> ", res)

movie -->  survivor


---
call with all filters

---

In [9]:
my_year=2020
my_duration_type = "seasons"
my_platform = "Netflix"
res = get_max_duration_inner(df, my_year, my_platform, my_duration_type)
print("movie --> ", res)

movie -->  grey's anatomy


---
### get_score_count_inner

---


In [10]:
def get_score_count_inner(df:pd.DataFrame, platform, scored, year):
    """
        This function returns the number of films by platform with a score 
        greater than XX in a given year
    """

    df_filtered = df[['id','release_year', 'score']]

    cond_platform = df_filtered['id'].str.startswith(platform.lower()[0])
    cond_year = df_filtered['release_year']==year
    cond_score = df_filtered['score']>scored
    mask = cond_platform & cond_year & cond_score    
    amount = df_filtered[mask].shape[0]
    

    return amount

---
call function with values in dataframe

---

In [12]:
my_year=2020
my_score = 3.6
my_platform = "netflix"
get_score_count_inner(df, my_platform, my_score, my_year)

71

---
call function with "wrong" values

---

In [13]:
my_year=2023
my_score = 3.6
my_platform = "netflix"
get_score_count_inner(df, my_platform, my_score, my_year)

0

---

### get_count_platform_inner

---


In [14]:

def get_count_platform_inner(df:pd.DataFrame, platform):
    """
        This function returns the number of movies per platform. 
    """
    df_filtered = df[['id']]
    cond_platform = df_filtered['id'].str.startswith(platform.lower()[0])
    amount = df_filtered[cond_platform].shape[0]
    
    return amount

---
call for each platform

---

In [17]:
n_netflix = get_count_platform_inner(df, "Netflix")
n_amazon = get_count_platform_inner(df, "Amazon")
n_disney = get_count_platform_inner(df, "Disney")
n_hulu = get_count_platform_inner(df, "Hulu")

print("amount of movies in disney platform --> ", n_disney)
print("amount of movies in amazon platform --> ", n_amazon)
print("amount of movies in netflix platform --> ", n_netflix)
print("amount of movies in hulu platform --> ", n_hulu)
print("Total --> ", n_netflix+n_amazon+n_disney+n_hulu)

amount of movies in disney platform -->  1450
amount of movies in amazon platform -->  9668
amount of movies in netflix platform -->  8807
amount of movies in hulu platform -->  3073
Total -->  22998


---
### get_actor_inner

---


In [27]:
import pandas as pd


def get_max_duration_inner(df:pd.DataFrame, year=None, platform=None, duration_type=None ):

    """
        This function returns the movie with longer duration with optional filters of year, 
        platform and duration_type
    """
    
    df_filtered = df[['id','title','release_year','duration_int', 'duration_type']]
    

    cond_year =  df_filtered['release_year'] == year if year else pd.Series(True, index=df_filtered.index)
    cond_platform = df_filtered['id'].str.startswith(platform.lower()[0]) if platform else pd.Series(True, index=df_filtered.index)
    cond_dur_type = df_filtered['duration_type']==duration_type.lower() if duration_type else pd.Series(True, index=df_filtered.index)
    mask = cond_year & cond_platform & cond_dur_type
    df_filtered = df_filtered[mask]
    
    
    # Sort and get biggest
    result = None
    if df_filtered.shape[0]>0:
        result = df_filtered.sort_values(by='duration_int',ascending=False).head(1).title.values[0]   
    return result



def get_score_count_inner(df:pd.DataFrame, platform, scored, year):
    """
        This function returns the number of films by platform with a score 
        greater than XX in a given year
    """

    df_filtered = df[['id','release_year', 'score']]

    cond_platform = df_filtered['id'].str.startswith(platform.lower()[0])
    cond_year = df_filtered['release_year']==year
    cond_score = df_filtered['score']>scored
    mask = cond_platform & cond_year & cond_score    
    amount = df_filtered[mask].shape[0]
    

    return amount



def get_count_platform_inner(df:pd.DataFrame, platform):
    """
        This function returns the number of movies per platform. 
    """
    df_filtered = df[['id']]
    cond_platform = df_filtered['id'].str.startswith(platform.lower()[0])
    amount = df_filtered[cond_platform].shape[0]
    
    return amount


def get_actor_inner(df:pd.DataFrame, platform, year):
    """ 
        This async function returns the actor who appears the most times according to platform and year
    """

    df_filtered = df[['id',"release_year", "cast"]]

    cond_platform = df_filtered['id'].str.startswith(platform.lower()[0])
    cond_year = df_filtered['release_year']==year
    mask = cond_platform & cond_year
    df_filtered = df_filtered[mask]

    most_common_cast = None
    if not df_filtered.empty :
        # discard "unknown cast"
        df_filtered = df_filtered.query('cast != "unknown cast"')

        # separate each actor in its own row
        df_cast = df_filtered['cast'].str.split(', ', expand=True).stack().reset_index(level=1, drop=True).to_frame('cast')

        # Count the number of occurrences of each actor
        count_cast = df_cast['cast'].value_counts()
        
        # Select the actor with the most occurrences
        most_common_cast = count_cast.idxmax()

    

    return most_common_cast

---
call with correct parameters

---

In [28]:
my_platform = "Netflix"
my_year = 2019
a = get_actor_inner(df, my_platform, my_year)
a

'vincent tong'

---
Call with wrong parameters

---

In [29]:
my_platform = "Netflix"
my_year = 2030
get_actor_inner(df, my_platform, my_year)