In [1]:
#Import Necessary Packages
import requests
import pandas as pd
#import Beautiful soup to help make sense of the html tags
from bs4 import BeautifulSoup
import re

In [2]:
#Pull all Characters into Df_Charactersrm
df_Charactersrm = pd.DataFrame()
for page in range(1,35):
    url='https://rickandmortyapi.com/api/character/?page=%s'%page
    response = requests.get(url)
    json_string = response.json()
    dftempchar = pd.json_normalize(json_string, record_path =['results'])
    df_Charactersrm = pd.concat([df_Charactersrm, dftempchar])
    


In [3]:
#Pull all Locations into Df_Locationsrm
df_Locationsrm = pd.DataFrame()
for page in range(1,7):
    url='https://rickandmortyapi.com/api/location/?page=%s'%page
    response = requests.get(url)
    json_string = response.json()
    dftemploc = pd.json_normalize(json_string, record_path =['results'])
    df_Locationsrm = pd.concat([df_Locationsrm, dftemploc])

In [4]:
#Pull all Episodes into Df_Episodesrm
df_Episodesrm = pd.DataFrame()
for page in range(1,4):
    url='https://rickandmortyapi.com/api/episode/?page=%s'%page
    response = requests.get(url)
    json_string = response.json()
    dftempep = pd.json_normalize(json_string, record_path =['results'])
    df_Episodesrm = pd.concat([df_Episodesrm, dftempep])

In [5]:
#Confirm Everything is There
print(df_Charactersrm.shape)
print(df_Locationsrm.shape)
print(df_Episodesrm.shape)

(671, 14)
(108, 7)
(41, 7)


In [6]:
#Now we will want to duplicate rows in the df_Charactersrm so that we have 1 row for each episode the character appeared in
df_Charactersrm =df_Charactersrm.explode('episode').reset_index(drop=True)
cols = list(df_Charactersrm.columns)
cols.append(cols.pop(cols.index('name')))
df_Charactersrm =df_Charactersrm[cols]

In [7]:
df_Charactersrm.shape

(1038, 14)

In [8]:
#Lets Do Some DF Cleanup pre Join

#Drop Unecessary Columns
df_Charactersrm = df_Charactersrm.drop(columns=['created','location.url','url','origin.url'])
df_Locationsrm = df_Locationsrm.drop(columns=['id','created','residents','url'])
df_Episodesrm = df_Episodesrm.drop(columns=['id','created','characters'])

#Rename Columns
df_Charactersrm = df_Charactersrm.rename(columns={"status":"Character_Status","species":"Character_Species","type":"Character_type","image":"Character_image","episode":"Episode_URL","origin.name":"Origin_Location_Name","location.name":"Current_Location_Name","name":"Character_Name"})
df_Episodesrm = df_Episodesrm.rename(columns={"url":"Episode_URL","name":"episode_name","episode":"episode_Number"})
#Rename Columns and Make 2 dfs for Location one for each join
df_Locationsrm_Current = df_Locationsrm.rename(columns={"name":"Current_Location_Name","type":"Current_Location_type","dimension":"Current Dimension"})
df_Locationsrm_Origin = df_Locationsrm.rename(columns={"name":"Origin_Location_Name","type":"Origin_Location_type","dimension":"Origin Dimension"})


In [9]:
#Join df_Charactersrm with df_Episodesrm to get episode information for each character
df_rm = df_Charactersrm.merge(df_Episodesrm, on='Episode_URL', how='left')
print(df_rm.shape)
#Join df_rm with df_Locationsrm_Current to get current location information
df_rm = df_rm.merge(df_Locationsrm_Current, on='Current_Location_Name', how='left')
print(df_rm.shape)
#Join df_rm with df_Locationsrm_Origin to get Origin location information
df_rm = df_rm.merge(df_Locationsrm_Origin, on='Origin_Location_Name', how='left')
print(df_rm.shape)

(1038, 13)
(1038, 15)
(1038, 17)


In [10]:
df_rm =df_rm.drop(columns=['Episode_URL'])


In [11]:
#Retrieve the HTMl data from the site and store in an object
URL = 'https://www.imdb.com/list/ls022236589/'
page = requests.get(URL)
#Apply Beautiful soup html parser 
soup = BeautifulSoup(page.content, 'html.parser')

In [12]:
#Pull the specific element you need
results = soup.find(id='main')

In [13]:
# Leveage <section> element with the class card-content. to select only the job listings
Rick_elems = results.find_all('div', class_='lister-item-content')

In [14]:
#Now lets pull the descriptive class names
for Rick_elem in Rick_elems:
    RunTime_elem = Rick_elem.find('span', class_='runtime')
    Rank_elem = Rick_elem.find('span', class_='lister-item-index unbold text-primary')
    Rating_elem = Rick_elem.find('span', class_='ipl-rating-star__rating')
    Description_elem = Rick_elem.find('p', class_='')
    Title_elem = Rick_elem.select_one("a:nth-of-type(2)")
    print(RunTime_elem.text.strip())
    print(Rank_elem.text.strip())
    print(Rating_elem.text.strip())
    print(Description_elem.text.strip())
    print(Title_elem.text.strip())
    print()

22 min
1.
9.6
The Smith house is locked down after parasites threaten to take over the world by multiplying through flashbacks.
Total Rickall

23 min
2.
9.3
Rick turns himself into a pickle while Beth, Summer, and Morty go to family therapy.
Pickle Rick

23 min
3.
8.9
Rick and Morty try to save a gas life form while Jerry resides in a daycare made for Jerrys.
Mortynight Run

22 min
4.
9.5
Rick & Morty go on a simple exchange as Rick tells Morty if something goes wrong jump into the same vat of acid as he does.
The Vat of Acid Episode

22 min
5.
9.6
Rick, still in galactic prison, puts an intricate escape plan into action. Back on Earth, which is now under federation control, Morty and Summer have an argument about their grandpa.
The Rickshank Rickdemption

22 min
6.
9.1
After Rick gives Morty a love potion for a school dance, things quickly spiral out of control when the serum splices with the flu - causing it to spread. Rick and Morty scramble to cure the crisis, making matters worse 

In [15]:
#Now lets turn this into a dataframe so we can use it for analysis
df_IMDB =[]
for Rick_elem in Rick_elems:
    RunTime_elem = Rick_elem.find('span', class_='runtime')
    Rank_elem = Rick_elem.find('span', class_='lister-item-index unbold text-primary')
    Rating_elem = Rick_elem.find('span', class_='ipl-rating-star__rating')
    Description_elem = Rick_elem.find('p', class_='')
    Title_elem = Rick_elem.select_one("a:nth-of-type(2)")
    if None in (RunTime_elem, Rank_elem, Rating_elem, Description_elem, Title_elem):
        continue
    Run_Time =RunTime_elem.text.strip()
    IMDB_Rank =Rank_elem.text.strip()
    IMDB_Rating =Rating_elem.text.strip()
    IMDB_Description = Description_elem.text.strip()
    IMDB_Episode_Title = Title_elem.text.strip()
    df_IMDB.append({'Run_Time' :Run_Time, 'IMDB_Rank' :IMDB_Rank, 'IMDB_Rating' :IMDB_Rating, 'IMDB_Description' :IMDB_Description, 'episode_name' :IMDB_Episode_Title})
pd.DataFrame(df_IMDB)

Unnamed: 0,Run_Time,IMDB_Rank,IMDB_Rating,IMDB_Description,episode_name
0,22 min,1.0,9.6,The Smith house is locked down after parasites...,Total Rickall
1,23 min,2.0,9.3,"Rick turns himself into a pickle while Beth, S...",Pickle Rick
2,23 min,3.0,8.9,Rick and Morty try to save a gas life form whi...,Mortynight Run
3,22 min,4.0,9.5,Rick & Morty go on a simple exchange as Rick t...,The Vat of Acid Episode
4,22 min,5.0,9.6,"Rick, still in galactic prison, puts an intric...",The Rickshank Rickdemption
5,22 min,6.0,9.1,After Rick gives Morty a love potion for a sch...,Rick Potion #9
6,22 min,7.0,8.9,"Following a stressful adventure, Rick and Mort...",Rest and Ricklaxation
7,22 min,8.0,9.2,"An adventure with an invisibility belt, but a ...",Star Mort Rickturn of the Jerri
8,22 min,9.0,8.9,"Rick reveals to Morty his ""Morty's mind blower...",Morty's Mind Blowers
9,22 min,10.0,9.8,Whilst Rick and Morty go off to adventure in A...,The Ricklantis Mixup


In [16]:
#Now we need to join our df_rm which is our data from the Rick and Morty API with the data we just pulled from IMDB on Rick and Morty
#lets make df_IMDB into a dataframe 
df_IMDB = pd.DataFrame(df_IMDB)

#Before Joining since the episode names arent exactly the same and we don't have a unique identifier we will do a few fixes
df_rm['episode_name'] = df_rm['episode_name'].str.replace("'", "")
df_rm['episode_name'] = df_rm['episode_name'].str.replace(":", "")
df_rm['episode_name'] = df_rm['episode_name'].str.replace(",", "")

df_IMDB['episode_name'] =df_IMDB['episode_name'].str.replace("'","")
df_IMDB['episode_name'] =df_IMDB['episode_name'].str.replace(":","")
df_IMDB['episode_name'] =df_IMDB['episode_name'].str.replace(",","")

df_rm['episode_name'] = df_rm['episode_name'].str.title()
df_IMDB['episode_name'] =df_IMDB['episode_name'].str.title()
#Join df_rm with df_IMDB to get the information from IMDB into the rest of our table
df_rm = df_rm.merge(df_IMDB, on='episode_name', how='left')
df_rm

Unnamed: 0,id,Character_Status,Character_Species,Character_type,gender,Character_image,Origin_Location_Name,Current_Location_Name,Character_Name,episode_name,air_date,episode_Number,Current_Location_type,Current Dimension,Origin_Location_type,Origin Dimension,Run_Time,IMDB_Rank,IMDB_Rating,IMDB_Description
0,1,Alive,Human,,Male,https://rickandmortyapi.com/api/character/avat...,Earth (C-137),Earth (Replacement Dimension),Rick Sanchez,Pilot,"December 2, 2013",S01E01,Planet,Replacement Dimension,Planet,Dimension C-137,22 min,32.,8,A strangely eccentric genius scientist and inv...
1,1,Alive,Human,,Male,https://rickandmortyapi.com/api/character/avat...,Earth (C-137),Earth (Replacement Dimension),Rick Sanchez,Lawnmower Dog,"December 9, 2013",S01E02,Planet,Replacement Dimension,Planet,Dimension C-137,22 min,11.,8.8,Rick helps Jerry out with the dog.
2,1,Alive,Human,,Male,https://rickandmortyapi.com/api/character/avat...,Earth (C-137),Earth (Replacement Dimension),Rick Sanchez,Anatomy Park,"December 16, 2013",S01E03,Planet,Replacement Dimension,Planet,Dimension C-137,22 min,27.,8.4,"It's Christmas. Rick shrinks Morty, injecting ..."
3,1,Alive,Human,,Male,https://rickandmortyapi.com/api/character/avat...,Earth (C-137),Earth (Replacement Dimension),Rick Sanchez,M. Night Shaym-Aliens!,"January 13, 2014",S01E04,Planet,Replacement Dimension,Planet,Dimension C-137,22 min,18.,8.7,Rick and Morty try to get to the bottom of a m...
4,1,Alive,Human,,Male,https://rickandmortyapi.com/api/character/avat...,Earth (C-137),Earth (Replacement Dimension),Rick Sanchez,Meeseeks And Destroy,"January 20, 2014",S01E05,Planet,Replacement Dimension,Planet,Dimension C-137,22 min,24.,9,Rick provides the family with a solution to th...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1033,667,Alive,Human,,Female,https://rickandmortyapi.com/api/character/avat...,Earth (Replacement Dimension),Earth (Replacement Dimension),Defiance Beth,Star Mort Rickturn Of The Jerri,"May 31, 2020",S04E10,Planet,Replacement Dimension,Planet,Replacement Dimension,22 min,8.,9.2,"An adventure with an invisibility belt, but a ..."
1034,668,Alive,Alien,Cat-Person,Female,https://rickandmortyapi.com/api/character/avat...,Planet Squanch,Defiance's Ship,Defiance Squanchette,Star Mort Rickturn Of The Jerri,"May 31, 2020",S04E10,Spacecraft,Replacement Dimension,Planet,Replacement Dimension,22 min,8.,9.2,"An adventure with an invisibility belt, but a ..."
1035,669,Alive,Alien,,Male,https://rickandmortyapi.com/api/character/avat...,unknown,Defiance's Base,Defiance Doctor,Star Mort Rickturn Of The Jerri,"May 31, 2020",S04E10,Base,Replacement Dimension,,,22 min,8.,9.2,"An adventure with an invisibility belt, but a ..."
1036,670,Dead,Alien,Gromflomite,Male,https://rickandmortyapi.com/api/character/avat...,Gromflom Prime,NX-5 Planet Remover,New Improved Galactic Federation Guard,Star Mort Rickturn Of The Jerri,"May 31, 2020",S04E10,Death Star,Replacement Dimension,Planet,Replacement Dimension,22 min,8.,9.2,"An adventure with an invisibility belt, but a ..."


In [17]:
df_rm.to_csv('Rick_And_Morty_Combined.csv')

In [18]:
df_IMDB

Unnamed: 0,Run_Time,IMDB_Rank,IMDB_Rating,IMDB_Description,episode_name
0,22 min,1.0,9.6,The Smith house is locked down after parasites...,Total Rickall
1,23 min,2.0,9.3,"Rick turns himself into a pickle while Beth, S...",Pickle Rick
2,23 min,3.0,8.9,Rick and Morty try to save a gas life form whi...,Mortynight Run
3,22 min,4.0,9.5,Rick & Morty go on a simple exchange as Rick t...,The Vat Of Acid Episode
4,22 min,5.0,9.6,"Rick, still in galactic prison, puts an intric...",The Rickshank Rickdemption
5,22 min,6.0,9.1,After Rick gives Morty a love potion for a sch...,Rick Potion #9
6,22 min,7.0,8.9,"Following a stressful adventure, Rick and Mort...",Rest And Ricklaxation
7,22 min,8.0,9.2,"An adventure with an invisibility belt, but a ...",Star Mort Rickturn Of The Jerri
8,22 min,9.0,8.9,"Rick reveals to Morty his ""Morty's mind blower...",Mortys Mind Blowers
9,22 min,10.0,9.8,Whilst Rick and Morty go off to adventure in A...,The Ricklantis Mixup
