In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_Game_of_Thrones_episodes'
r = requests.get(url)

In [3]:
html_soup = BeautifulSoup(r.text, 'html.parser')

In [4]:
series_overview_table = html_soup.find('table', class_="wikitable").find_all('tr')

headers = []
for index, header in enumerate(series_overview_table[0].find_all('th')):
    if index == 2:
        for header_ in (series_overview_table[1].find_all('th')):
            headers.append(header_.text)
    else:
        headers.append(header.text)

row_values = []
for row in series_overview_table[2:]:
    row_value = []
    for index, col in enumerate(row.find_all(['th', 'td'])[1:]):
        if (index == 2) or (index == 3):
            row_value.append(col.text.replace('\xa0', ' '))
        elif index == 4:
            row_value.append(col.text.split('[')[0])
        else:
            row_value.append(col.text)
    row_values.append(row_value)

season_overview_df = pd.DataFrame(row_values, columns=headers)
season_overview_df

Unnamed: 0,Season,Episodes,First aired,Last aired,Avg. U.S. viewers(millions)
0,1,10,"April 17, 2011 (2011-04-17)","June 19, 2011 (2011-06-19)",2.52
1,2,10,"April 1, 2012 (2012-04-01)","June 3, 2012 (2012-06-03)",3.8
2,3,10,"March 31, 2013 (2013-03-31)","June 9, 2013 (2013-06-09)",4.97
3,4,10,"April 6, 2014 (2014-04-06)","June 15, 2014 (2014-06-15)",6.84
4,5,10,"April 12, 2015 (2015-04-12)","June 14, 2015 (2015-06-14)",6.88
5,6,10,"April 24, 2016 (2016-04-24)","June 26, 2016 (2016-06-26)",7.69
6,7,7,"July 16, 2017 (2017-07-16)","August 27, 2017 (2017-08-27)",10.26
7,8,6,"April 14, 2019 (2019-04-14)","May 19, 2019 (2019-05-19)",11.99


In [5]:
season_tables = html_soup.find_all('table', class_='wikiepisodetable')[:-1]

season_headers = []
headers_soup = season_tables[0].find('tr').find_all('th')
for index, header in enumerate(headers_soup):
    if index == 5:
        season_headers.append(header.text.split('\u200a')[0])
    else:
        season_headers.append(header.text)

season_headers

['No.overall',
 'No. inseason',
 'Title',
 'Directed by',
 'Written by',
 'Original air date',
 'U.S. viewers(millions)']

In [6]:
all_seasons_rows = []
for table in season_tables:
    season_rows = []
    for row in table.find_all('tr')[1:]:
        season_row = []
        for index, col in enumerate(row.find_all(['th', 'td'])):
            if index == 2:
                season_row.append(col.text.strip('"'))
            elif index == 5:
                season_row.append(col.text.replace('\xa0', ' '))
            elif index == 6:
                season_row.append(col.text.split('[')[0])
            else:
                season_row.append(col.text)
        season_rows.append(season_row)
    all_seasons_rows.append(season_rows)

In [7]:
# Season 1
season1_df = pd.DataFrame(all_seasons_rows[0], columns=season_headers)
season1_df

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,U.S. viewers(millions)
0,1,1,Winter Is Coming,Tim Van Patten,David Benioff & D. B. Weiss,"April 17, 2011 (2011-04-17)",2.22
1,2,2,The Kingsroad,Tim Van Patten,David Benioff & D. B. Weiss,"April 24, 2011 (2011-04-24)",2.2
2,3,3,Lord Snow,Brian Kirk,David Benioff & D. B. Weiss,"May 1, 2011 (2011-05-01)",2.44
3,4,4,"Cripples, Bastards, and Broken Things",Brian Kirk,Bryan Cogman,"May 8, 2011 (2011-05-08)",2.45
4,5,5,The Wolf and the Lion,Brian Kirk,David Benioff & D. B. Weiss,"May 15, 2011 (2011-05-15)",2.58
5,6,6,A Golden Crown,Daniel Minahan,Story by : David Benioff & D. B. WeissTeleplay...,"May 22, 2011 (2011-05-22)",2.44
6,7,7,You Win or You Die,Daniel Minahan,David Benioff & D. B. Weiss,"May 29, 2011 (2011-05-29)",2.4
7,8,8,The Pointy End,Daniel Minahan,George R. R. Martin,"June 5, 2011 (2011-06-05)",2.72
8,9,9,Baelor,Alan Taylor,David Benioff & D. B. Weiss,"June 12, 2011 (2011-06-12)",2.66
9,10,10,Fire and Blood,Alan Taylor,David Benioff & D. B. Weiss,"June 19, 2011 (2011-06-19)",3.04


In [8]:
# Season 2
season2_df = pd.DataFrame(all_seasons_rows[1], columns=season_headers)
season2_df

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,U.S. viewers(millions)
0,11,1,The North Remembers,Alan Taylor,David Benioff & D. B. Weiss,"April 1, 2012 (2012-04-01)",3.86
1,12,2,The Night Lands,Alan Taylor,David Benioff & D. B. Weiss,"April 8, 2012 (2012-04-08)",3.76
2,13,3,What Is Dead May Never Die,Alik Sakharov,Bryan Cogman,"April 15, 2012 (2012-04-15)",3.77
3,14,4,Garden of Bones,David Petrarca,Vanessa Taylor,"April 22, 2012 (2012-04-22)",3.65
4,15,5,The Ghost of Harrenhal,David Petrarca,David Benioff & D. B. Weiss,"April 29, 2012 (2012-04-29)",3.9
5,16,6,The Old Gods and the New,David Nutter,Vanessa Taylor,"May 6, 2012 (2012-05-06)",3.88
6,17,7,A Man Without Honor,David Nutter,David Benioff & D. B. Weiss,"May 13, 2012 (2012-05-13)",3.69
7,18,8,The Prince of Winterfell,Alan Taylor,David Benioff & D. B. Weiss,"May 20, 2012 (2012-05-20)",3.86
8,19,9,Blackwater,Neil Marshall,George R. R. Martin,"May 27, 2012 (2012-05-27)",3.38
9,20,10,Valar Morghulis,Alan Taylor,David Benioff & D. B. Weiss,"June 3, 2012 (2012-06-03)",4.2


In [9]:
# Season 3
season3_df = pd.DataFrame(all_seasons_rows[2], columns=season_headers)
season3_df

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,U.S. viewers(millions)
0,21,1,Valar Dohaeris,Daniel Minahan,David Benioff & D. B. Weiss,"March 31, 2013 (2013-03-31)",4.37
1,22,2,"Dark Wings, Dark Words",Daniel Minahan,Vanessa Taylor,"April 7, 2013 (2013-04-07)",4.27
2,23,3,Walk of Punishment,David Benioff,David Benioff & D. B. Weiss,"April 14, 2013 (2013-04-14)",4.72
3,24,4,And Now His Watch Is Ended,Alex Graves,David Benioff & D. B. Weiss,"April 21, 2013 (2013-04-21)",4.87
4,25,5,Kissed by Fire,Alex Graves,Bryan Cogman,"April 28, 2013 (2013-04-28)",5.35
5,26,6,The Climb,Alik Sakharov,David Benioff & D. B. Weiss,"May 5, 2013 (2013-05-05)",5.5
6,27,7,The Bear and the Maiden Fair,Michelle MacLaren,George R. R. Martin,"May 12, 2013 (2013-05-12)",4.84
7,28,8,Second Sons,Michelle MacLaren,David Benioff & D. B. Weiss,"May 19, 2013 (2013-05-19)",5.13
8,29,9,The Rains of Castamere,David Nutter,David Benioff & D. B. Weiss,"June 2, 2013 (2013-06-02)",5.22
9,30,10,Mhysa,David Nutter,David Benioff & D. B. Weiss,"June 9, 2013 (2013-06-09)",5.39


In [10]:
# Season 4
season4_df = pd.DataFrame(all_seasons_rows[3], columns=season_headers)
season4_df

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,U.S. viewers(millions)
0,31,1,Two Swords,D. B. Weiss,David Benioff & D. B. Weiss,"April 6, 2014 (2014-04-06)",6.64
1,32,2,The Lion and the Rose,Alex Graves,George R. R. Martin,"April 13, 2014 (2014-04-13)",6.31
2,33,3,Breaker of Chains,Alex Graves,David Benioff & D. B. Weiss,"April 20, 2014 (2014-04-20)",6.59
3,34,4,Oathkeeper,Michelle MacLaren,Bryan Cogman,"April 27, 2014 (2014-04-27)",6.95
4,35,5,First of His Name,Michelle MacLaren,David Benioff & D. B. Weiss,"May 4, 2014 (2014-05-04)",7.16
5,36,6,The Laws of Gods and Men,Alik Sakharov,Bryan Cogman,"May 11, 2014 (2014-05-11)",6.4
6,37,7,Mockingbird,Alik Sakharov,David Benioff & D. B. Weiss,"May 18, 2014 (2014-05-18)",7.2
7,38,8,The Mountain and the Viper,Alex Graves,David Benioff & D. B. Weiss,"June 1, 2014 (2014-06-01)",7.17
8,39,9,The Watchers on the Wall,Neil Marshall,David Benioff & D. B. Weiss,"June 8, 2014 (2014-06-08)",6.95
9,40,10,The Children,Alex Graves,David Benioff & D. B. Weiss,"June 15, 2014 (2014-06-15)",7.09


In [11]:
# Season 5
season5_df = pd.DataFrame(all_seasons_rows[4], columns=season_headers)
season5_df

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,U.S. viewers(millions)
0,41,1,The Wars to Come,Michael Slovis,David Benioff & D. B. Weiss,"April 12, 2015 (2015-04-12)",8.0
1,42,2,The House of Black and White,Michael Slovis,David Benioff & D. B. Weiss,"April 19, 2015 (2015-04-19)",6.81
2,43,3,High Sparrow,Mark Mylod,David Benioff & D. B. Weiss,"April 26, 2015 (2015-04-26)",6.71
3,44,4,Sons of the Harpy,Mark Mylod,Dave Hill,"May 3, 2015 (2015-05-03)",6.82
4,45,5,Kill the Boy,Jeremy Podeswa,Bryan Cogman,"May 10, 2015 (2015-05-10)",6.56
5,46,6,"Unbowed, Unbent, Unbroken",Jeremy Podeswa,Bryan Cogman,"May 17, 2015 (2015-05-17)",6.24
6,47,7,The Gift,Miguel Sapochnik,David Benioff & D. B. Weiss,"May 24, 2015 (2015-05-24)",5.4
7,48,8,Hardhome,Miguel Sapochnik,David Benioff & D. B. Weiss,"May 31, 2015 (2015-05-31)",7.01
8,49,9,The Dance of Dragons,David Nutter,David Benioff & D. B. Weiss,"June 7, 2015 (2015-06-07)",7.14
9,50,10,Mother's Mercy,David Nutter,David Benioff & D. B. Weiss,"June 14, 2015 (2015-06-14)",8.11


In [12]:
# Season 6
season6_df = pd.DataFrame(all_seasons_rows[5], columns=season_headers)
season6_df

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,U.S. viewers(millions)
0,51,1,The Red Woman,Jeremy Podeswa,David Benioff & D. B. Weiss,"April 24, 2016 (2016-04-24)",7.94
1,52,2,Home,Jeremy Podeswa,Dave Hill,"May 1, 2016 (2016-05-01)",7.29
2,53,3,Oathbreaker,Daniel Sackheim,David Benioff & D. B. Weiss,"May 8, 2016 (2016-05-08)",7.28
3,54,4,Book of the Stranger,Daniel Sackheim,David Benioff & D. B. Weiss,"May 15, 2016 (2016-05-15)",7.82
4,55,5,The Door,Jack Bender,David Benioff & D. B. Weiss,"May 22, 2016 (2016-05-22)",7.89
5,56,6,Blood of My Blood,Jack Bender,Bryan Cogman,"May 29, 2016 (2016-05-29)",6.71
6,57,7,The Broken Man,Mark Mylod,Bryan Cogman,"June 5, 2016 (2016-06-05)",7.8
7,58,8,No One,Mark Mylod,David Benioff & D. B. Weiss,"June 12, 2016 (2016-06-12)",7.6
8,59,9,Battle of the Bastards,Miguel Sapochnik,David Benioff & D. B. Weiss,"June 19, 2016 (2016-06-19)",7.66
9,60,10,The Winds of Winter,Miguel Sapochnik,David Benioff & D. B. Weiss,"June 26, 2016 (2016-06-26)",8.89


In [13]:
# Season 7
season7_df = pd.DataFrame(all_seasons_rows[6], columns=season_headers)
season7_df

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,U.S. viewers(millions)
0,61,1,Dragonstone,Jeremy Podeswa,David Benioff & D. B. Weiss,"July 16, 2017 (2017-07-16)",10.11
1,62,2,Stormborn,Mark Mylod,Bryan Cogman,"July 23, 2017 (2017-07-23)",9.27
2,63,3,The Queen's Justice,Mark Mylod,David Benioff & D. B. Weiss,"July 30, 2017 (2017-07-30)",9.25
3,64,4,The Spoils of War,Matt Shakman,David Benioff & D. B. Weiss,"August 6, 2017 (2017-08-06)",10.17
4,65,5,Eastwatch,Matt Shakman,Dave Hill,"August 13, 2017 (2017-08-13)",10.72
5,66,6,Beyond the Wall,Alan Taylor,David Benioff & D. B. Weiss,"August 20, 2017 (2017-08-20)",10.24
6,67,7,The Dragon and the Wolf,Jeremy Podeswa,David Benioff & D. B. Weiss,"August 27, 2017 (2017-08-27)",12.07


In [14]:
# Season 8
season8_df = pd.DataFrame(all_seasons_rows[7], columns=season_headers)
season8_df

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,U.S. viewers(millions)
0,68,1,Winterfell,David Nutter,Dave Hill,"April 14, 2019 (2019-04-14)",11.76
1,69,2,A Knight of the Seven Kingdoms,David Nutter,Bryan Cogman,"April 21, 2019 (2019-04-21)",10.29
2,70,3,The Long Night,Miguel Sapochnik,David Benioff & D. B. Weiss,"April 28, 2019 (2019-04-28)",12.02
3,71,4,The Last of the Starks,David Nutter,David Benioff & D. B. Weiss,"May 5, 2019 (2019-05-05)",11.8
4,72,5,The Bells,Miguel Sapochnik,David Benioff & D. B. Weiss,"May 12, 2019 (2019-05-12)",12.48
5,73,6,The Iron Throne,David Benioff & D. B. Weiss,David Benioff & D. B. Weiss,"May 19, 2019 (2019-05-19)",13.61


In [22]:
ratings_table = html_soup.find_all('table', class_='wikitable')[-1].find_all('tr')

ratings_headers = []
for index, header in enumerate(ratings_table[0].find_all('th')):
    if index == 1:
        for header_ in ratings_table[1].find_all('th'):
            ratings_headers.append(f'Episode{header_.text}')
    else:
        ratings_headers.append(header.text)

ratings_rows = []
for row in ratings_table[2:]:
    ratings_row = []
    for index, col in enumerate(row.find_all(['th', 'td'])[1:]):
        if col.text == 'N/A':
            for i in range(int(col.get('colspan'))):
                ratings_row.append('N/A')
        else:
            ratings_row.append(col.text)
    ratings_rows.append(ratings_row)

ratings_df = pd.DataFrame(ratings_rows, columns=ratings_headers)
ratings_df

Unnamed: 0,Season,Episode1,Episode2,Episode3,Episode4,Episode5,Episode6,Episode7,Episode8,Episode9,Episode10,Average
0,1,2.22,2.2,2.44,2.45,2.58,2.44,2.4,2.72,2.66,3.04,2.52
1,2,3.86,3.76,3.77,3.65,3.9,3.88,3.69,3.86,3.38,4.2,3.8
2,3,4.37,4.27,4.72,4.87,5.35,5.5,4.84,5.13,5.22,5.39,4.97
3,4,6.64,6.31,6.59,6.95,7.16,6.4,7.2,7.17,6.95,7.09,6.84
4,5,8.0,6.81,6.71,6.82,6.56,6.24,5.4,7.01,7.14,8.11,6.88
5,6,7.94,7.29,7.28,7.82,7.89,6.71,7.8,7.6,7.66,8.89,7.69
6,7,10.11,9.27,9.25,10.17,10.72,10.24,12.07,,,,10.26
7,8,11.76,10.29,12.02,11.8,12.48,13.61,,,,,11.99
