In [1]:
# Libraries
import requests
import pandas as pd
pd.set_option('display.max_rows', None)

## Obtener datos de la API

In [2]:
url = "https://breakingbadapi.com/api/episodes"
response = requests.get(url)

In [3]:
if response.status_code == 200:
    print("¡ La requests se ha realizado con éxito !")
else:
    print(f"Error: {response.status_code}")

¡ La requests se ha realizado con éxito !


In [4]:
dataset = response.json()
df = pd.json_normalize(dataset)
df.head(3)

Unnamed: 0,episode_id,title,season,air_date,characters,episode,series
0,1,Pilot,1,01-20-2008,"[Walter White, Jesse Pinkman, Skyler White, Ha...",1,Breaking Bad
1,2,Cat's in the Bag...,1,01-27-2008,"[Walter White, Jesse Pinkman, Skyler White, Wa...",2,Breaking Bad
2,3,...And the Bag's in the River,1,02-10-2008,"[Walter White, Jesse Pinkman, Skyler White, Ha...",3,Breaking Bad


## Limpieza de datos

In [5]:
df.shape
print("Filas del dataset:", df.shape[0])
print("Columnas del dataset:", df.shape[1])

Filas del dataset: 102
Columnas del dataset: 7


In [6]:
print("Los tipos de datos del dataset original son: \n")
print(df.dtypes)

df = df.astype({'season': 'int', 'episode': 'int', 'air_date': 'datetime64[ns]'})
print("Los tipos de datos del dataset modificado son: \n")
print(df.dtypes)

Los tipos de datos del dataset original son: 

episode_id     int64
title         object
season        object
air_date      object
characters    object
episode       object
series        object
dtype: object
Los tipos de datos del dataset modificado son: 

episode_id             int64
title                 object
season                 int64
air_date      datetime64[ns]
characters            object
episode                int64
series                object
dtype: object


In [7]:
df.isnull().sum()

episode_id    0
title         0
season        0
air_date      0
characters    0
episode       0
series        0
dtype: int64

In [8]:
df['series'].unique()

array(['Breaking Bad', 'Better Call Saul'], dtype=object)

In [9]:
df_bb = df[df['series'] == 'Breaking Bad']
print("En este caso solo analizaremos la serie Breaking bad")

En este caso solo analizaremos la serie Breaking bad


In [10]:
df_bb.shape
print("-------- Dataset Breaking Bad --------")
print("Filas del dataset:", df_bb.shape[0])
print("Columnas del dataset:", df_bb.shape[1])

-------- Dataset Breaking Bad --------
Filas del dataset: 62
Columnas del dataset: 7


## Resumen

In [11]:
print("Titulo: Breaking Bad \n")

start = df_bb['air_date'].min().strftime('%m-%d-%Y')
print(f"Primera emisión: {start} \n")

end = df_bb['air_date'].max().strftime('%m-%d-%Y')
print(f"Última emisión: {end} \n")

count_season = df_bb['season'].nunique()
print(f"Numero de temporadas: {count_season} \n")

count_episodes = df_bb['episode'].count()
print(f"Numero de episodios: {count_episodes} \n")

characters = []
for list in df_bb['characters']:
    for value in list:
        if value in characters:
            continue
        else:
            characters.append(value)
print(f"Reparto: {characters}")

Titulo: Breaking Bad 

Primera emisión: 01-20-2008 

Última emisión: 09-29-2013 

Numero de temporadas: 5 

Numero de episodios: 62 

Reparto: ['Walter White', 'Jesse Pinkman', 'Skyler White', 'Hank Schrader', 'Marie Schrader', 'Walter White Jr.', 'Krazy-8', 'Bogdan Wolynetz', 'Gretchen Schwartz', 'Ken Wins', 'Elliott Schwarts', 'Gretchen Swartz', 'Badger', 'Tuco Salamanca', 'Hector Salamanca', 'Jane Margolis', 'Tortuga', 'Ted Beneke', 'Saul Goodman', 'Gustavo Fring', 'Donald Margolis', 'Combo', 'Mike Ehrmantraut', 'The cousins', 'Gale Boetticher', 'a fly', 'Andrea Cantillo', 'Victor', 'White White Jr.', 'Don Eladio', 'Lydia Rodarte-Quayle', 'Todd Alquist', 'Skinny Pete', 'Jack Welker', 'Steve Gomez', 'Eliott Schwartz']


## Personajes más frecuentes según temporada

In [12]:
def temporada(num):
    
    dicc_fre = {}
    df_tem = df_bb[df_bb['season'] == num]

    for list in df_tem['characters']:
        for item in list:
            if item in dicc_fre:
                dicc_fre[item] += 1
            else:
                dicc_fre[item] = 1
                
    df_end = pd.DataFrame([[key, dicc_fre[key]] for key in dicc_fre.keys()],
                      columns=['Personaje', 'Apariciones']).sort_values(by= 'Apariciones', ascending= False)
    return df_end


   
for i in range(1,count_season + 1):
    print(f"---------Temporada {i}----------")
    display(temporada(i))

---------Temporada 1----------


Unnamed: 0,Personaje,Apariciones
0,Walter White,7
1,Jesse Pinkman,7
2,Skyler White,7
5,Walter White Jr.,7
3,Hank Schrader,6
4,Marie Schrader,6
6,Krazy-8,3
13,Tuco Salamanca,2
7,Bogdan Wolynetz,1
8,Gretchen Schwartz,1


---------Temporada 2----------


Unnamed: 0,Personaje,Apariciones
0,Walter White,13
2,Skyler White,13
1,Jesse Pinkman,13
4,Marie Schrader,11
5,Walter White Jr.,11
3,Hank Schrader,10
8,Jane Margolis,8
12,Ted Beneke,4
13,Saul Goodman,4
15,Donald Margolis,3


---------Temporada 3----------


Unnamed: 0,Personaje,Apariciones
0,Walter White,13
1,Jesse Pinkman,13
2,Skyler White,12
6,Gustavo Fring,11
3,Hank Schrader,11
5,Walter White Jr.,11
8,Saul Goodman,10
4,Marie Schrader,10
9,Mike Ehrmantraut,5
11,Ted Beneke,4


---------Temporada 4----------


Unnamed: 0,Personaje,Apariciones
0,Walter White,13
2,Skyler White,13
1,Jesse Pinkman,13
6,Gustavo Fring,12
3,Hank Schrader,11
4,Marie Schrader,11
7,Saul Goodman,10
8,Mike Ehrmantraut,10
5,Walter White Jr.,10
11,Andrea Cantillo,4


---------Temporada 5----------


Unnamed: 0,Personaje,Apariciones
0,Walter White,16
1,Jesse Pinkman,16
2,Skyler White,16
9,Marie Schrader,13
3,Hank Schrader,13
4,Walter White Jr.,12
5,Saul Goodman,12
12,Todd Alquist,11
8,Lydia Rodarte-Quayle,9
6,Mike Ehrmantraut,8
