In [60]:
    import requests
    import pandas as pd
    import numpy as np
    from bs4 import BeautifulSoup
    from itertools import product
    
    class HTMLTableParser:
       
        def parse_url(self, url):
            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'lxml')
            return [(self.read_table(table)) for table in soup.find_all('table')]  
        
        def read_table(self, table_tag):
            rowspans = []  # track pending rowspans
            rows = table_tag.find_all('tr')

            # first scan, see how many columns we need
            colcount = 0
            column_names=[]
            for r, row in enumerate(rows):
                cells = row.find_all(['td', 'th'], recursive=False)
                # count columns (including spanned).
                # add active rowspans from preceding rows
                # we *ignore* the colspan value on the last cell, to prevent
                # creating 'phantom' columns with no actual cells, only extended
                # colspans. This is achieved by hardcoding the last cell width as 1. 
                # a colspan of 0 means “fill until the end” but can really only apply
                # to the last cell; ignore it elsewhere. 
                colcount = max(
                    colcount,
                    sum(int(c.get('colspan', 1)) or 1 for c in cells[:-1]) + len(cells[-1:]) + len(rowspans))
                # update rowspan bookkeeping; 0 is a span to the bottom. 
                rowspans += [int(c.get('rowspan', 1)) or len(rows) - r for c in cells]
                rowspans = [s - 1 for s in rowspans if s > 1]

            # it doesn't matter if there are still rowspan numbers 'active'; no extra
            # rows to show in the table means the larger than 1 rowspan numbers in the
            # last table row are ignored.    
            # build an empty matrix for all possible cells
            table = [[None] * colcount for row in rows]



            # fill matrix from row data
            rowspans = {}  # track pending rowspans, column number mapping to count
            for row, row_elem in enumerate(rows):
                span_offset = 0  # how many columns are skipped due to row and colspans 
                for col, cell in enumerate(row_elem.find_all(['td', 'th'], recursive=False)):
                    # adjust for preceding row and colspans
                    col += span_offset
                    while rowspans.get(col, 0):
                        span_offset += 1
                        col += 1

                    # fill table data
                    rowspan = rowspans[col] = int(cell.get('rowspan', 1)) or len(rows) - row
                    colspan = int(cell.get('colspan', 1)) or colcount - col
                    # next column is offset by the colspan
                    span_offset += colspan - 1
                    value = cell.get_text()
                    points = len(cell.find_all("div", {"class": " point"}))
                    pens = cell.find_all("div", {"class": "penalty major"})  
                    if (value == '' or value == '&nbsp') and points != '':
                        value = str(points)
                    if len(pens) > 0:
                        value = ''
                        for pen in pens:
                            value += pen.find_all(text=True, recursive=False)[0] + " "
                    if len(cell.find_all("div", {"class": "lead"}))== 1:
                        if value == "0": value = ""
                        value += "Lead "
                    if len(cell.find_all("div", {"class": "leadloss"}))== 1:
                        if value == "0": value = ""
                        value += "LeadLoss "
                    if len(cell.find_all("div", {"class": "lost"}))== 1:
                        if value == "0": value = ""
                        value += "LeadLoss "                   
                    if len(cell.find_all("div", {"class": "call"}))== 1:
                        if value == "0": value = ""
                        value += "call "
                    if len(cell.find_all("div", {"class": "nopass"}))== 1:
                        if value == "0": value = ""
                        value += "np "    
                    if len(cell.find_all("div", {"class": "starpass"}))== 1:
                        if value == "0": value = ""
                        value += "sp "    
                    for drow, dcol in product(range(rowspan), range(colspan)):
                        try:
                            table[row + drow][col + dcol] = value
                            rowspans[col + dcol] = rowspan
                        except IndexError:
                            # rowspan or colspan outside the confines of the table
                            pass

                # update rowspan bookkeeping
                rowspans = {c: s - 1 for c, s in rowspans.items() if s > 1}
            npt = np.array(table)
            #df = pd.DataFrame(np.array(table), column_names) 
            return table

        def parse_url_todf(self, url):
            tables = self.parse_url(url)
            dfs = []
            if "combos" in url:
                for table in tables:
                    headers = table.pop(0)
                    df = pd.DataFrame(np.array(table), columns=np.array(headers))    
                    dfs.append(df)
            
            else:
                headers = tables[0]
                headersnew = []

                for header in headers:
                    headernew = [i for i in header if i] 
                    headersnew.append(headernew)
                for i in range(len(tables)-1):    
                    df = pd.DataFrame(np.array(tables[i+1]), columns=headersnew[i])    
                    dfs.append(df)
                
            return dfs    

In [61]:
import requests
url = "http://flattrackstats.com/bouts/89490/playercombos"
response = requests.get(url)
response.text[:1000] # Access the HTML with the text property

'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">\n\n<head>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n  <title>3/26/17: Killamazoo (83) @ Ohio (375) | Playercombos | Roller Derby Stats & Rankings | Flat Track Stats</title>\n  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n<link rel=\'shortcut icon\' href=\'http://flattrackstats.com/sites/default/files/favicon.ico\' type=\'image/x-icon\' />  <link type="text/css" rel="stylesheet" media="all" href="/sites/default/files/css/css_77fc50b19351c3898fdde9570cdabd93.css" />\n<link type="text/css" rel="stylesheet" media="print" href="/sites/default/files/css/css_20c5d9daf11cb9ce9bb6c0cac33cd2d2.css" />\n<!--[if IE]>\n<link type="text/css" rel="stylesheet" media="all" href="/sites/all/themes/zen/zen/ie.css?T" />\n<![endif]-->\n    \n    <script type=\'text

In [62]:
hp = HTMLTableParser()
headers = hp.parse_url_todf(url)
print(headers[6])

                    blocker 1     blocker 2      blocker 3  \
0                  Amy Spears      Chainsaw   Kelsey Khaos   
1                   Bigg Rigg   Blitz Lemon       Chainsaw   
2                 Blitz Lemon      Chainsaw  Betty T. KayO   
3   Lora Wayman (Outa My W...    Amy Spears      Bigg Rigg   
4   Lora Wayman (Outa My W...      Chainsaw   Kelsey Khaos   
5   Lora Wayman (Outa My W...     Bigg Rigg   Kelsey Khaos   
6                   Bigg Rigg      Chainsaw   Kelsey Khaos   
7   Lora Wayman (Outa My W...   Blitz Lemon    Slammerhead   
8   Lora Wayman (Outa My W...     Bigg Rigg    Slammerhead   
9   Lora Wayman (Outa My W...     Bigg Rigg    Paige Bleed   
10  Lora Wayman (Outa My W...  Kelsey Khaos    Slammerhead   
11  Lora Wayman (Outa My W...   Blitz Lemon       Chainsaw   
12                Blitz Lemon      Chainsaw    Slammerhead   
13                Blitz Lemon      Chainsaw   Kelsey Khaos   
14  Lora Wayman (Outa My W...     Bigg Rigg    Slammerhead   
15  Lora