In [1]:
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import pandas as pd
import html5lib
import time
import datetime

In [2]:
webdriver = "chromedriver_win32/chromedriver.exe"
driver = Chrome(webdriver)

start_date = "2020-01-26"
my_url = "https://therundown.io/odds/nba/{}".format(start_date)
driver.get(my_url)

In [3]:
# Find the ellipses button for each game and click it to expand the graphs view
elps_buttons = driver.find_elements_by_class_name("glyphicon.glyphicon-option-horizontal")

for btn in elps_buttons:
    btn.click()
    time.sleep(2)

In [4]:
# Click the tables button to show the data in table format instead of graph
graph_buttons = driver.find_elements_by_xpath("//ul[@class='ReactTabs__TabList']//li[not(contains(@class,'ReactTabs__Tab--selected'))]")

for btn in graph_buttons:
    # The webpage wont let us click the button if it is not in view, so first we scroll to the button
    coordinates = btn.location_once_scrolled_into_view # returns dict of X, Y coordinates
    driver.execute_script('window.scrollTo({}, {});'.format(coordinates['x'], coordinates['y']))
    btn.click()
    time.sleep(2)
    

In [24]:
table = driver.find_element_by_xpath("//div[@class='SortableTable__body']")
scroll_to = ActionChains(driver)
scroll_to.move_to_element(table).perform()

In [27]:
actions = ActionChains(driver)
actions.send_keys(Keys.ARROW_DOWN)

# Select the table so we can scroll down properly
table = driver.find_element_by_xpath("//div[@class='SortableTable__body']")
scroll_to = ActionChains(driver)
scroll_to.move_to_element(table).perform()
table.click()
time.sleep(1)
table.click()

rows = []
# We will only get the odds from the first record to 3 hours back
time_raw = driver.find_elements_by_xpath("//div[@class='SortableTable__body']//div[@class='SortableTable__row']")[0].text.split('\n')[0]
curr_time = time.mktime(time.strptime(time_raw, '%m/%y %I:%M:%S %p'))
end_time = curr_time - (60*60*3) # 3 hours
while curr_time > end_time:
    rows_raw = driver.find_elements_by_xpath("//div[@class='SortableTable__body']//div[@class='SortableTable__row']")
    for x in range(len(rows_raw)):
        arr = rows_raw[x].text.split('\n')
        if arr not in rows:
            rows.append(arr)
    curr_time = time.mktime(time.strptime(arr[0], '%m/%y %I:%M:%S %p'))
    # scroll the table
    for x in range(7):
        actions.perform()

['Time', 'Book', 'Miami Heat', 'San Antonio Spurs']

In [28]:
df = pd.DataFrame(rows)
df.columns = ['Time', 'Book', 'Away Team', 'Home Team']
df.head()

Unnamed: 0,Time,Book,Away Team,Home Team
0,1/26 8:24:08 am,RedZone,+4½ +178,-4½ -231
1,1/26 8:23:15 am,RedZone,+5½ +126,-5½ -159
2,1/26 8:22:58 am,RedZone,+5½ -112,-5½ -112
3,1/26 8:22:23 am,RedZone,+3½ +139,-3½ -177
4,1/26 8:22:08 am,RedZone,+3½ +144,-3½ -183


In [29]:
tmp = df["Home Team"].str.split(" ", expand=True)
df['Home Line'] = tmp[0]
df['Home Odds'] = tmp[1]
tmp = df["Away Team"].str.split(" ", expand=True)
df['Away Line'] = tmp[0]
df['Away Odds'] = tmp[1]
df.head()

Unnamed: 0,Time,Book,Away Team,Home Team,Home Line,Home Odds,Away Line,Away Odds
0,1/26 8:24:08 am,RedZone,+4½ +178,-4½ -231,-4½,-231,+4½,178
1,1/26 8:23:15 am,RedZone,+5½ +126,-5½ -159,-5½,-159,+5½,126
2,1/26 8:22:58 am,RedZone,+5½ -112,-5½ -112,-5½,-112,+5½,-112
3,1/26 8:22:23 am,RedZone,+3½ +139,-3½ -177,-3½,-177,+3½,139
4,1/26 8:22:08 am,RedZone,+3½ +144,-3½ -183,-3½,-183,+3½,144


In [30]:
header_raw = driver.find_elements_by_xpath("//div[@class='SortableTable__header']//div[@class='SortableTable__header-row']")
for x in range(len(header_raw)):
    header = header_raw[0].text.split('\n')
    
df['Away Team'] = header[2]
df['Home Team'] = header[3]
df.head()

Unnamed: 0,Time,Book,Away Team,Home Team,Home Line,Home Odds,Away Line,Away Odds
0,1/26 8:24:08 am,RedZone,Dallas Mavericks,Utah Jazz,-4½,-231,+4½,178
1,1/26 8:23:15 am,RedZone,Dallas Mavericks,Utah Jazz,-5½,-159,+5½,126
2,1/26 8:22:58 am,RedZone,Dallas Mavericks,Utah Jazz,-5½,-112,+5½,-112
3,1/26 8:22:23 am,RedZone,Dallas Mavericks,Utah Jazz,-3½,-177,+3½,139
4,1/26 8:22:08 am,RedZone,Dallas Mavericks,Utah Jazz,-3½,-183,+3½,144


In [31]:
# Only find the scores for the game including the current team
scores_raw = driver.find_elements_by_xpath("//div[@class='lines-teams-container']//a[contains(@href,'{}')]//div[@class='team-score']".format(header[2].split(' ')[0]))
df['Home Score'] = int(scores_raw[1].text)
df['Away Score'] = int(scores_raw[0].text)
df.head()

Unnamed: 0,Time,Book,Away Team,Home Team,Home Line,Home Odds,Away Line,Away Odds,Away Score,Home Score
0,1/26 8:24:08 am,RedZone,Dallas Mavericks,Utah Jazz,-4½,-231,+4½,178,107,112
1,1/26 8:23:15 am,RedZone,Dallas Mavericks,Utah Jazz,-5½,-159,+5½,126,107,112
2,1/26 8:22:58 am,RedZone,Dallas Mavericks,Utah Jazz,-5½,-112,+5½,-112,107,112
3,1/26 8:22:23 am,RedZone,Dallas Mavericks,Utah Jazz,-3½,-177,+3½,139,107,112
4,1/26 8:22:08 am,RedZone,Dallas Mavericks,Utah Jazz,-3½,-183,+3½,144,107,112


In [37]:
# Change the lines and odds to integers
x = '-5½'
x[1]
df['Home Line'] = df['Home Line'].str.replace('½','.5')
df['Away Line'] = df['Away Line'].str.replace('½','.5')
df.head()

Unnamed: 0,Time,Book,Away Team,Home Team,Home Line,Home Odds,Away Line,Away Odds,Away Score,Home Score
0,1/26 8:24:08 am,RedZone,Dallas Mavericks,Utah Jazz,-4.5,-231,4.5,178,107,112
1,1/26 8:23:15 am,RedZone,Dallas Mavericks,Utah Jazz,-5.5,-159,5.5,126,107,112
2,1/26 8:22:58 am,RedZone,Dallas Mavericks,Utah Jazz,-5.5,-112,5.5,-112,107,112
3,1/26 8:22:23 am,RedZone,Dallas Mavericks,Utah Jazz,-3.5,-177,3.5,139,107,112
4,1/26 8:22:08 am,RedZone,Dallas Mavericks,Utah Jazz,-3.5,-183,3.5,144,107,112


In [50]:
df['Home Line Open'] = df['Home Line'].iloc[-1]
df['Away Line Open'] = df['Away Line'].iloc[-1]
df.head()

Unnamed: 0,Time,Book,Away Team,Home Team,Home Line,Home Odds,Away Line,Away Odds,Away Score,Home Score,Home Line Open,Away Line Open
0,1/26 8:24:08 am,RedZone,Dallas Mavericks,Utah Jazz,-4.5,-231,4.5,178,107,112,-5,5
1,1/26 8:23:15 am,RedZone,Dallas Mavericks,Utah Jazz,-5.5,-159,5.5,126,107,112,-5,5
2,1/26 8:22:58 am,RedZone,Dallas Mavericks,Utah Jazz,-5.5,-112,5.5,-112,107,112,-5,5
3,1/26 8:22:23 am,RedZone,Dallas Mavericks,Utah Jazz,-3.5,-177,3.5,139,107,112,-5,5
4,1/26 8:22:08 am,RedZone,Dallas Mavericks,Utah Jazz,-3.5,-183,3.5,144,107,112,-5,5
