# Import packages
Oddsportal has dynamic content, i.e. javascript rendered page. I used `selenium` package to load and render url content with Morzilla Firefox then parsed source code to `BeautifulSoup` to extract information of interest.

In [63]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import bs4
import time
import pandas as pd
import numpy as np
from datetime import datetime
from psw import psw, usr

## Helper functions for processing HTML code

In [2]:
def get_unique_href(soup, league, season):
    """
    Input:
        soup: bs4.BeautifulSoup element (HTML source code parced with selenium webdriver)
        league: string, e.g. NBA or Euro
        season: string  e.g. 2017/1018 or 2013/2014
    Output:
        List of list. Each list element contains a size of 3
        [league, season, url (unique for each game)]
    """
    _data = list()
    rows = soup.tbody.findAll('tr')
    for row in rows:
        if len(row.contents) == 6:
            # read url for detailed match coefficient analysis
            href = row.contents[1].find('a', href=True)
            href = "https://www.oddsportal.com" + href['href']
            
            _data.append([league, season, href])
    return _data

def convert_2_int(string):
    """
    Input:
        string: e.g. "123" or "93"
    Output:
        int:
    If ValueError, print problematic string and return NAN value
    """
    try:
        return int(string)
    except ValueError:
        print("Input {} could not be converted to integer".format(string))
        return np.nan
    
def convert_2_float(string):
    """
    Input:
        string: e.g. "123" or "93"
    Output:
        int:
    If ValueError, print problematic string and return NAN value
    """
    try:
        return float(string)
    except ValueError:
        print("Input {} could not be converted to float".format(string))
        return np.nan

## Initiate webdriver
`executable_path` points to excecutable used to connect to Firefox. To use different browser download approprate geckodriver (hyperlink) or refer to this Stack exchange post.

In [3]:
%%time
driver = webdriver.Firefox(executable_path=r"geckodriver.exe")

# go to Oddsportal website
driver.get("https://www.oddsportal.com")

# click on log-in button
driver.find_element_by_tag_name('button').click()

# enter User name and psw
driver.find_element_by_id('login-username1').send_keys(usr)
driver.find_element_by_id('login-password1').send_keys(psw, Keys.ENTER)

# set timeout for page loadding to 30 sec
driver.set_page_load_timeout(30)

# set wait element for explicit wait
wait = WebDriverWait(driver, 10)

Wall time: 9.85 s


## Load NBA and Euroleague 2013-2018 season data
* `season_dict` dictonary keys are  league and season names, while values are tuples (url, number of pages to iterate over).
* itrate over season url pages and read team scored points and home/away team average coefficients
* transform data into pandas DataFrame and save it as .csv file

In [4]:
league = "Euroleague"
season = "2017_2018"
season_url = "https://www.oddsportal.com/basketball/europe/euroleague-2017-2018/results/#/page/"
# number of pages for the season
no_pages = 6

In [20]:
%%time
# temporary list to store data
_data = list()

for idx in range(1, no_pages+1):
    # Load page
    driver.get(season_url+str(idx))
    # explicitly wait till page is loaded
    wait.until(EC.visibility_of_element_located((By.ID, 'tournamentTable')))
    # Process HTLM into data
    soup = bs4.BeautifulSoup(driver.page_source)
    _data += get_unique_href(soup, league, season)

# create dataframe with unqiue URLS
df_urls = pd.DataFrame(_data, columns=["League", "Season", "URL"])

Wall time: 5.7 s


## Analyze matches in more detail

In [244]:
def get_team_names(soup):
    """
    Input:
        soup: BS4 soup element
    """
    try:
        names = soup.h1.text.split(" - ")
        return names[0], names[1]
    except Exception as e:
        print(e)
        print("Team names not found")
        return np.nan, np.nan
    
            
def load_page(driver, url):
    """
    Return true if page was loaded correctly, else if error occured
    """
    try:
        driver.get(url)
        return True
    except Exception as e:
        print("page not loaded")
        print(f"{e}")
        return False
    
def get_match_date(soup):
    """
    Return date as string
    """   
    try:
        match_date = soup.find("div", {"id": "col-left"}).p.text
        datetime_object = datetime.strptime(match_date, '%A, %d %b %Y, %H:%M')
        return datetime_object
    except:
        return np.nan  
    
def get_opening_odds(driver):
    """
    Return closing odds
    """
    soup = bs4.BeautifulSoup(driver.page_source)
    try:
        _coef_open = soup.find("span", {"id": "tooltiptext"}).contents[-2].text
        return convert_2_float(_coef_open)
    except:
        return np.nan
    
def get_score(soup):
    """
    Return
    """
    element = soup.find("p", {"class": "result"})
    quater_scores = element.contents[-1].split(",")
    if len(element.strong.text.split("OT"))==1:
        OT = False
        home_s = convert_2_int(element.strong.text.split(":")[0])
        away_s = convert_2_int(element.strong.text.split(":")[1])
    else:
        OT = True
        score_string = element.strong.text.split("OT")[0][:-1]
        home_s = convert_2_int(score_string.split(":")[0])
        away_s = convert_2_int(score_string.split(":")[1])
        
    # get quater scores    
    Q1 = quater_scores[0][2:].split(":")
    Q1_h = convert_2_int(Q1[0])
    Q1_a = convert_2_int(Q1[1])
    Q2 = quater_scores[1][1:].split(":")
    Q2_h = convert_2_int(Q2[0])
    Q2_a = convert_2_int(Q2[1])
    Q3 = quater_scores[2][1:].split(":")
    Q3_h = convert_2_int(Q3[0])
    Q3_a = convert_2_int(Q3[1])
    Q4 = quater_scores[3][1:-1].split(":")
    Q4_h = convert_2_int(Q4[0])
    Q4_a = convert_2_int(Q4[1])
        
    return [home_s, away_s, OT, Q1_h, Q1_a, Q2_h, Q2_a, Q3_h, Q3_a, Q4_h, Q4_a]
    
def get_h2h_coef(driver):
    """
    Return list of list with 
    """
    main_table = driver.find_element_by_xpath("//table[@class='table-main detail-odds sortable']")
    main_table = main_table.find_element_by_tag_name('tbody')
    _data = [np.nan]*12
    for _ in main_table.find_elements_by_tag_name('tr'):
        # filter out empty rows
        row = _.find_elements_by_tag_name('td')
        if len(row) == 5:
            book_name = row[0].text[1:-2]
            # exclude empty rows
            if book_name == '':
                continue
            # remove new bookmaker tags
            if book_name[-4:] == '\nNEW':
                book_name = book_name.replace('\nNEW', '')

            if book_name in ["bet365", "Dafabet", "Pinnacle"]:
                # read different bookmaker prices

                # H2H home coef.
                _element = row[1]
                _coef_close = convert_2_float(_element.text)
                ActionChains(driver).move_to_element(_element).perform()
                _coef_open = get_opening_odds(driver)
                if book_name=="bet365":
                    _data[0] = _coef_close
                    _data[1] = _coef_open
                elif book_name=="Dafabet":
                    _data[4] = _coef_close
                    _data[5] = _coef_open
                else:
                    _data[8] = _coef_close
                    _data[9] = _coef_open

                # H2H away coef.
                _element = row[2]
                _coef_close = convert_2_float(_element.text)
                ActionChains(driver).move_to_element(_element).perform()
                _coef_open = get_opening_odds(driver)
                if book_name=="bet365":
                    _data[2] = _coef_close
                    _data[3] = _coef_open
                elif book_name=="Dafabet":
                    _data[6] = _coef_close
                    _data[7] = _coef_open
                else:
                    _data[10] = _coef_close
                    _data[11] = _coef_open
    
    return _data        

def click_AH_OU_button(driver, wait, AH=True):
    try:
        if AH:
            # click AH button
            driver.find_element_by_xpath("//span[contains(text(), 'AH')]").click()
        else:
            # click OU button
            driver.find_element_by_xpath("//span[contains(text(), 'O/U')]").click()
        wait.until(EC.element_to_be_clickable((By.ID, 'odds-data-table')))
    except Exception as e:
        print("Button not found")
        print(e)
        
def click_xpath(driver, xpath):
    """
    Tries to click on Xpath element
    """
    try:
        driver.find_element_by_xpath(xpath).click()
    except Exception as e:
        print("Error {} occured".format(e))
        
def click_max_book_market(driver, soup, ah = True):
    """
    Find and click on AH or OU market with highest number of bookmakers
    """
    soup = bs4.BeautifulSoup(driver.page_source)
    rows = soup.find("div", {"id": "odds-data-table"}).findAll("div", {"class": "table-container"})
    
    max_book_count = 0
    max_type = np.nan
    for row in rows:
        if row.strong is not None:
            if row.strong.text[:5] != "Click":
                if convert_2_int(row.find("span", {"class": "odds-cnt"}).text[1:-1]) > max_book_count:
                    max_book_count = convert_2_int(row.find("span", {"class": "odds-cnt"}).text[1:-1])
                    if ah:
                        max_type = row.strong.text[15:]
                    else:
                        max_type = row.strong.text[11:]

    if ah:
        _xpath = "//*[contains(text(), 'Asian handicap " + max_type + "')]"
    else:
        _xpath = "//*[contains(text(), 'Over/Under " + max_type + "')]"

    click_xpath(driver, _xpath)

In [287]:
%%time
# temporary list to store data
_data = list()
failed_urls = list()

for url in df_urls.URL.values[:10]:
    if load_page(driver, url):
        # wait till page is loaded
        wait.until(EC.visibility_of_element_located((By.ID , "odds-data-table")))
        # scroll down, so that all bookmakers are accecible
        driver.execute_script("window.scrollTo(0, 320)")
        # get soup element
        soup = bs4.BeautifulSoup(driver.page_source)
        # read team names
        home_n, away_n = get_team_names(soup)
        # get match date
        match_date = get_match_date(soup)
        # get score
        score = get_score(soup)
        # get bookmaker prices
        _coefs = get_h2h_coef(driver)
        
        # get Asian handicap info
        # click AH button
        click_AH_OU_button(driver, wait)
        # click on max book AH market
        click_max_book_market(driver, bs4.BeautifulSoup(driver.page_source))
        
        # get OU info
        # click OU button
        click_AH_OU_button(driver, wait, False)
        # click on max book OU market
        click_max_book_market(driver, bs4.BeautifulSoup(driver.page_source), False)
        
        
        # add data
        _data.append([match_date, home_n, away_n, url]+score+_coefs)
    else:
        failed_urls.append([url])
        
# create DataFrame
df_events = pd.DataFrame(_data, columns=["Date", "Home_n", "Away_n", "URL", "Home_score", "Away_score", "OT",
                                         "Q1_home", "Q1_away", "Q2_home", "Q2_away", "Q3_Home", "Q3_away",
                                         "Q4_home", "Q4_away",
                                         "H2H_home_bet365_close", "H2H_home_bet365_open",
                                         "H2H_away_bet365_close", "H2H_away_bet365_open",
                                         "H2H_home_Dafa_close", "H2H_home_Dafa_open",
                                         "H2H_away_Dafa_close", "H2H_away_Dafa_open",
                                         "H2H_home_Pinnacle_close", "H2H_home_Pinnacle_open",
                                         "H2H_away_Pinnacle_close", "H2H_away_Pinnacle_open"])

[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[163.5, 1.9, 2.0, 1.9, 2.0, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[171.5, 1.9, 2.0, 1.9, 2.0, nan, nan, nan, nan, nan, nan, nan, nan]
Dafa
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[173.5, nan, nan, nan, nan, 1.86, 1.94, 1.98, 2.06, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[155.5, 1.9, 2.0, 1.9, 2.0, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[165.0, 1.9, 2.0, 1.9, 2.0, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[167.5, 1.9, 2.0, 1.9, 2.0, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[160.5, 1.9, 2.0, 1.9, 2.0, nan, nan, nan, nan, nan, nan, nan, nan]
Dafa
[nan, nan, nan, nan, nan, nan, nan, nan, nan, na

In [189]:
df_events

Unnamed: 0,Date,Home_n,Away_n,URL,Home_score,Away_score,OT,Q1_home,Q1_away,Q2_home,...,H2H_away_bet365_close,H2H_away_bet365_open,H2H_home_Dafa_close,H2H_home_Dafa_open,H2H_away_Dafa_close,H2H_away_Dafa_open,H2H_home_Pinnacle_close,H2H_home_Pinnacle_open,H2H_away_Pinnacle_close,H2H_away_Pinnacle_open
0,2018-05-20 21:00:00,Real Madrid,Fenerbahce,https://www.oddsportal.com/basketball/europe/e...,85,80,False,21,17,17,...,2.0,2.0,1.88,1.92,1.98,1.92,1.94,1.95,1.96,1.95
1,2018-05-20 18:00:00,CSKA Moscow,Zalgiris Kaunas,https://www.oddsportal.com/basketball/europe/e...,77,79,False,19,22,16,...,3.15,3.5,1.33,1.4,3.43,3.04,1.42,1.4,3.14,3.22
2,2018-05-18 22:00:00,CSKA Moscow,Real Madrid,https://www.oddsportal.com/basketball/europe/e...,83,92,False,30,20,16,...,2.55,2.55,1.63,1.6,2.36,2.38,1.63,1.68,2.44,2.32
3,2018-05-18 19:00:00,Fenerbahce,Zalgiris Kaunas,https://www.oddsportal.com/basketball/europe/e...,76,67,False,19,13,20,...,3.0,3.3,1.42,1.38,3.0,3.12,1.43,1.38,3.05,3.34
4,2018-04-27 22:00:00,Real Madrid,Panathinaikos,https://www.oddsportal.com/basketball/europe/e...,89,82,False,19,14,32,...,3.3,3.5,1.35,1.38,3.32,3.12,1.4,1.37,3.21,3.4
5,2018-04-27 20:00:00,Khimki M.,CSKA Moscow,https://www.oddsportal.com/basketball/europe/e...,88,89,False,25,32,19,...,1.52,1.47,2.53,2.42,1.55,1.58,2.6,2.59,1.56,1.56
6,2018-04-26 22:00:00,Baskonia,Fenerbahce,https://www.oddsportal.com/basketball/europe/e...,83,92,False,19,25,13,...,1.83,2.1,2.12,1.86,1.76,1.98,2.0,1.85,1.91,2.06
7,2018-04-26 20:00:00,Zalgiris Kaunas,Olympiakos,https://www.oddsportal.com/basketball/europe/e...,101,91,False,23,22,28,...,2.67,2.67,1.46,1.55,2.81,2.49,1.55,1.55,2.65,2.65
8,2018-04-25 20:00:00,Khimki M.,CSKA Moscow,https://www.oddsportal.com/basketball/europe/e...,79,73,False,17,15,27,...,1.57,1.52,2.49,2.63,1.57,1.5,2.45,2.76,1.62,1.51
9,2018-04-25 19:45:00,Real Madrid,Panathinaikos,https://www.oddsportal.com/basketball/europe/e...,81,74,False,17,17,23,...,3.3,3.5,1.34,1.34,3.38,3.38,1.37,1.3,3.39,3.89


In [284]:
def get_ou_prices(soup):
    """
    Input:
        soup: bs4 soup element
    Output:
        list of lists
    """
    rows = soup.find("div", {"id": "odds-data-table"}).findAll("div", {"class": "table-container"})
    _data_list = _data_list = [np.nan]*13
    for row in rows:
        avg_p = row.find("tr", {"class": "aver"})
        max_p = row.find("tr", {"class": "highest"})
        if avg_p is not None and max_p is not None:
            bet_type = convert_2_float(row.contents[0].strong.text[12:-1])
            if row.find("a", string="bet365") is not None:
                _prices = row.find("a", string="bet365").parent.parent.parent.findAll("td")
                _o = convert_2_float(_prices[2].text)
                _u = convert_2_float(_prices[3].text)
                _o_prop = round(((_u + _o) / _u), 2) 
                _u_prop = round(((_u + _o) / _o), 2)
                _data_list[0] = bet_type
                _data_list[1] = _o
                _data_list[2] = _o_prop
                _data_list[3] = _u
                _data_list[4] = _u_prop
            elif row.find("a", string="Dafabet") is not None:
                _prices = row.find("a", string="Dafabet").parent.parent.parent.findAll("td")
                _o = convert_2_float(_prices[2].text)
                _u = convert_2_float(_prices[3].text)
                _o_prop = round(((_u + _o) / _u), 2) 
                _u_prop = round(((_u + _o) / _o), 2)
                _data_list[0] = bet_type
                _data_list[5] = _o
                _data_list[6] = _o_prop
                _data_list[7] = _u
                _data_list[8] = _u_prop
            elif row.find("a", string="Pinnacle") is not None:
                _prices = row.find("a", string="Pinnacle").parent.parent.parent.findAll("td")
                _o = convert_2_float(_prices[2].text)
                _u = convert_2_float(_prices[3].text)
                _o_prop = round(((_u + _o) / _u), 2) 
                _u_prop = round(((_u + _o) / _o), 2)
                _data_list[0] = bet_type
                _data_list[9] = _o
                _data_list[10] = _o_prop
                _data_list[11] = _u
                _data_list[12] = _u_prop

    return _data_list

In [286]:
soup = bs4.BeautifulSoup(driver.page_source)
get_ou_prices(bs4.BeautifulSoup(driver.page_source))
                
                
# table = soup.find("div", {"id": "odds-data-table"})
# table = table.findAll("div", {"class": "table-container"})
# for row in table:
#     if row.text != "BETTING EXCHANGES":
#         if ah:
#             bet_type = row.strong.text[15:]
#         else:
#             bet_type = row.strong.text[11:]
#         odd_count = int(row.find("span", {"class": "odds-cnt"}).text[1:-1])
#         print(bet_type)
#         print(odd_count)
#         if odd_count >= 10:
#             if ah:
#                 _xpath = "//*[contains(text(), 'Asian handicap " + bet_type + "')]"
#             else:
#                 _xpath = "//*[contains(text(), 'Over/Under " + bet_type + "')]"
#             self.click_xpath(_xpath)

Dafa
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]


[166.5, nan, nan, nan, nan, 1.94, 2.02, 1.9, 1.98, nan, nan, nan, nan]

In [254]:
_data_list = [np.nan]*13
_data_list

[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]

In [190]:
try:
    # click AH button
    driver.find_element_by_xpath("//span[contains(text(), 'AH')]").click()
    wait.until(EC.element_to_be_clickable((By.ID, 'odds-data-table')))
#     # get soup
#     soup = bs4.BeautifulSoup(driver.page_source)
#     ah_prices = get_ah_ou_coef(soup)
except Exception as e:
    print(e)
    print("AH prices not read")

In [185]:
soup = bs4.BeautifulSoup(driver.page_source)
element = soup.find("p", {"class": "result"})
quater_scores = element.contents[-1].split(",")
if len(element.strong.text.split("OT"))==1:
    OT = False
    home_s = convert_2_int(element.strong.text.split(":")[0])
    away_s = convert_2_int(element.strong.text.split(":")[1])
else:
    OT = True
    score_string = element.strong.text.split("OT")[0][:-1]
    home_s = convert_2_int(score_string.split(":")[0])
    away_s = convert_2_int(score_string.split(":")[1])
    
Q1 = quater_scores[0][2:].split(":")
Q1_h = convert_2_int(Q1[0])
Q1_a = convert_2_int(Q1[1])
Q2 = quater_scores[1][1:].split(":")
Q2_h = convert_2_int(Q2[0])
Q2_a = convert_2_int(Q2[1])
Q3 = quater_scores[2][1:].split(":")
Q3_h = convert_2_int(Q3[0])
Q3_a = convert_2_int(Q3[1])
Q4 = quater_scores[3][1:-1].split(":")
Q4_h = convert_2_int(Q4[0])
Q4_a = convert_2_int(Q4[1])

In [186]:
Q4

['24', '21']

In [117]:
home_s

81

In [79]:
test = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]

In [80]:
test[0] = 1
test

[1, nan, nan, nan, nan, nan]

In [68]:
for e in range(1, 3):
    print(e)

1
2


In [67]:
# df_events

In [None]:
df_example['Time'] = pd.to_datetime(df_example['Time'])

In [45]:
match_date

'Wednesday, 25 Apr  2018, 19:45'

In [48]:
# 

datetime.datetime(2018, 4, 25, 19, 45)

In [42]:
get_match_date(bs4.BeautifulSoup(driver.page_source))

'Wednesday, 25 Apr  2018, 19:45'

In [30]:
wait = WebDriverWait(driver, 10)

In [None]:
def get_ah_ou_coef(soup):
    """
    Returns asian handicap coefficient or under/over totals, which were offered by largest number of book makers
    Same logic applies for both type pages
    input:
        market_type: boolean, tells how to read asian handicap or over/under totals
        soup: bs4.BeautifulSoup element (HTML source code parced with selenium webdriver)
    output:
        list: [asian handicap, home coef., away coef.]
        or
        list: [asian handicap, home coef., away coef.]
    """
    table = soup.find("div", {"id": "odds-data-table"})
    table = table.findAll("div", {"class": "table-container"})
    max_book_count = 0
    max_ah = np.nan
    for row in table:
        if row.text != "BETTING EXCHANGES":
            odd_count = int(row.find("span", {"class":"odds-cnt"}).text[1:-1])
            if odd_count > max_book_count:
                max_book_count = odd_count
                bet_type = row.strong.text
                price_1 = convert_2_float(row.findAll('span')[1].text)
                price_2 = convert_2_float(row.findAll('span')[2].text)
    return [bet_type, price_1, price_2]

In [None]:
_league = "NBA"
_season = "2018/2019"
filename = _league + "_" + _season.replace("/","_") + ".csv"
cond_1 = df_1.League == _league
cond_2 = df_1.Season == _season
df_c = df_1[cond_1 & cond_2]
df_c.head()

## Read NBA or Euro League detailed stats

In [None]:
%%time

wait = WebDriverWait(driver, 20)
all_data = list()
i = 0
for url in df_c.URL.values:
    try:
        driver.get(url)
    except Exception as e:
        print("page not loaded")
    ah_prices = [np.nan, np.nan, np.nan]
    # get soup
    soup = bs4.BeautifulSoup(driver.page_source)
    # get team names
    try:
        names = soup.h1.text.split(" - ")
        name_h = names[0]
        name_a = names[1]
    except Exception as e:
        name_h = np.nan
        name_a = np.nan
        print(e)
        print("Team names not found")
    # get match date
    try:
        match_date = soup.find("div", {"id": "col-content"}).p.text
    except:
        match_date = np.nan    
    try:
        # click AH button
        driver.find_element_by_xpath("//span[contains(text(), 'AH')]").click()
        wait.until(EC.element_to_be_clickable((By.ID, 'odds-data-table')))
        # get soup
        soup = bs4.BeautifulSoup(driver.page_source)
        ah_prices = get_ah_ou_coef(soup)
    except Exception as e:
        print(e)
        print("AH prices not read")  
    ou_prices = [np.nan, np.nan, np.nan]
    try:
        # click OU button
        driver.find_element_by_xpath("//span[contains(text(), 'O/U')]").click()
        wait.until(EC.element_to_be_clickable((By.ID, 'odds-data-table')))
        # get soup
        soup = bs4.BeautifulSoup(driver.page_source)
        ou_prices = get_ah_ou_coef(soup)
    except Exception as e:
        print(e)
        print("OU prices not read")
    # add new data
    all_data.append(ah_prices + ou_prices + [name_h, name_a, match_date, url])
    # save temporary data:
    if i%100==0:
        # make temporal save in case webdriver crashes, loss of internet connection, ect.
        filename = _league + "_" + _season.replace("/","_") + "_" + str(i-100) + "_" + str(i)+ ".csv"
        df_2 = pd.DataFrame(all_data, columns=["AH", "AH_Home", "AH_Away",
                                       "OU", "Over", "Under",
                                       "Home_name", "Away_name", "Date", "URL"])
        df_2.to_csv(filename)
    i+=1
filename = _league + "_" + _season.replace("/","_") + "_" + str(i)+ ".csv"
df_2 = pd.DataFrame(all_data, columns=["AH", "AH_Home", "AH_Away",
                                       "OU", "Over", "Under",
                                       "Home_name", "Away_name", "Date", "URL"])
df_2.to_csv(filename)
df_2.head()

## Turn off webdriver

In [288]:
driver.quit()

# OLD code

In [None]:
%%time
season_dict = {
    "NBA_2018/2019": ("https://www.oddsportal.com/basketball/usa/nba/results//#/page/", 8)
    "NBA_2017/2018": ("https://www.oddsportal.com/basketball/usa/nba-2017-2018/results/#/page/", 28),
    "NBA_2016/2017": ("https://www.oddsportal.com/basketball/usa/nba-2016-2017/results/#/page/", 29),
    "NBA_2015/2016": ("https://www.oddsportal.com/basketball/usa/nba-2015-2016/results/#/page/", 29),
    "NBA_2014/2015": ("https://www.oddsportal.com/basketball/usa/nba-2014-2015/results/#/page/", 29),
    "NBA_2013/2014": ("https://www.oddsportal.com/basketball/usa/nba-2013-2014/results/#/page/", 29),
    "EURO_2017/2018": ("https://www.oddsportal.com/basketball/europe/euroleague-2017-2018/results/#/page/", 6),
    "EURO_2016/2017": ("https://www.oddsportal.com/basketball/europe/euroleague-2016-2017/results/#/page/", 6),
    "EURO_2015/2016": ("https://www.oddsportal.com/basketball/europe/euroleague-2015-2016/results/#/page/", 5),
    "EURO_2014/2015": ("https://www.oddsportal.com/basketball/europe/euroleague-2014-2015/results/#/page/", 6),
    "EURO_2013/2014": ("https://www.oddsportal.com/basketball/europe/euroleague-2013-2014/results/#/page/", 6)
}

all_data = list()
for key in season_dict.keys():
    # Load main url page
    url = season_dict[key][0]
    # Extract league and season from key string
    league = key.split("_")[0]
    season = key.split("_")[1]
    # Iterate over all pages for particular season
    for idx in range(1, season_dict[key][1]+1):
        # Load page
        driver.get(url+str(idx))
        # quick and dirty fix, implicit wait for 1.5 sec,  so that page is really loaded
        time.sleep(5)
        # Process HTLM into data
        soup = bs4.BeautifulSoup(driver.page_source)
        all_data += process_soup(soup, league, season)
        
df_1 = pd.DataFrame(all_data, columns=["League", "Season", "Home_score", "Away_score",
                                       "Win", "OT", "Home_p", "Away_p", "URL"])
df_1.to_csv("basketball_scores.csv")
df_1.head()