# **Objective: Get All Match Boards Per League Season Page**

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import time, os

In [2]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

chrome_driver = "/Applications/chromedriver" # path to the chromedriver executable
os.environ["webdriver.chrome.driver"] = chrome_driver

In [19]:
"""
    For league season websites, get all "target=" locations for matches
    Each one of these will give you a game board
    Ex: https://tv.dartconnect.com/leaguestandings/wcdo/809/all
"""

driver = webdriver.Chrome(chrome_driver)
driver.get("https://tv.dartconnect.com/leaguestandings/wcdo/809/all")

In [20]:
#Use Beautiful Soup to parse
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [21]:
#Location of each clickable link to a team's matches
mplinks = soup.find_all(class_="mp_link")

In [22]:
#Make a list of links
teamid = []
for link in mplinks:
    teamid.append(link.get('data-id'))

In [24]:
#Function for getting matches when team link clicked
teamclick = driver.find_elements_by_class_name('mp_link')

def seasonmatches(season):
    SeaMatch = []
    teamsoup = BeautifulSoup(driver.page_source, 'html.parser')
    matlinks = teamsoup.find_all('a')
    for link in matlinks:
        SeaMatch.append(link.get("target"))
    SeaMatch = SeaMatch[5:]
    return SeaMatch

In [27]:
#Loop for executing seasonmatch funtion on all teams

LeagueMatches = []
for team in teamclick:
    team.click()
    time.sleep(1)
    LeagueMatches.append(seasonmatches(team))

In [28]:
#Visual check on list of matches
LeagueMatches

[['5c3d3935b6e2a72828e2e59b',
  '5c4677f2f26c301150bb86cc',
  '5c4fb74b1e0efe61e2677a44',
  '5c58ce418e82d95b2d56f748',
  '5c6b5b037bf62409720964df',
  '5c74a0b67bf62409720b2d2b',
  '5c86ed846fb2ec48debe3808',
  '5c9977cccb036407c224ee2d',
  '5ca4086637e86363e13eb8fe',
  '5cabef4f37e86363e1402399',
  '5cad4125b878511b4d0879eb'],
 ['5c3e9442b6e2a72828e3251a',
  '5c47cb63f26c301150bbc241',
  '5c5108aa1e0efe61e267b2fe',
  '5c6cba127bf624097209b7f8',
  '5c75f3807bf62409720b7258',
  '5c7f2c8034f2bf05f08edfe0',
  '5c885a646fb2ec48debe9831',
  '5c91937b6fb2ec48dec0593a',
  '5c9ad12fb14b0b0a35c5ad0d',
  '5ca406b037e86363e13eb70c'],
 ['5c3d40f2b6e2a72828e2ea49',
  '5c467bbef26c301150bb88e3',
  '5c6228ee4efae707bc57e8ea',
  '5c6b62367bf62409720968e0',
  '5c74a0a57bf62409720b2d1e',
  '5c8707cf6fb2ec48debe4ad9',
  '5c90403a6fb2ec48dec00aed',
  '5c997a4dcb036407c224f02f',
  '5ca406b037e86363e13eb70c',
  '5cabf09d37e86363e1402466',
  '5cad4125b878511b4d0879eb'],
 ['5c3e92eeb6e2a72828e323f7',
  '5c51

In [29]:
# The 5-digit game codes are located at "https://members.dartconnect.com/history/report/event/"<5dgcd>
# There may be links to matches at those locations

In [30]:
#Get rid of redundant matches, make teamseason lists into a single list
CleanMatch = []
for team in LeagueMatches:
    for match in team:
        if match not in CleanMatch:
            CleanMatch.append(match)
        else:
            continue
CleanMatch

['5c3d3935b6e2a72828e2e59b',
 '5c4677f2f26c301150bb86cc',
 '5c4fb74b1e0efe61e2677a44',
 '5c58ce418e82d95b2d56f748',
 '5c6b5b037bf62409720964df',
 '5c74a0b67bf62409720b2d2b',
 '5c86ed846fb2ec48debe3808',
 '5c9977cccb036407c224ee2d',
 '5ca4086637e86363e13eb8fe',
 '5cabef4f37e86363e1402399',
 '5cad4125b878511b4d0879eb',
 '5c3e9442b6e2a72828e3251a',
 '5c47cb63f26c301150bbc241',
 '5c5108aa1e0efe61e267b2fe',
 '5c6cba127bf624097209b7f8',
 '5c75f3807bf62409720b7258',
 '5c7f2c8034f2bf05f08edfe0',
 '5c885a646fb2ec48debe9831',
 '5c91937b6fb2ec48dec0593a',
 '5c9ad12fb14b0b0a35c5ad0d',
 '5ca406b037e86363e13eb70c',
 '5c3d40f2b6e2a72828e2ea49',
 '5c467bbef26c301150bb88e3',
 '5c6228ee4efae707bc57e8ea',
 '5c6b62367bf62409720968e0',
 '5c74a0a57bf62409720b2d1e',
 '5c8707cf6fb2ec48debe4ad9',
 '5c90403a6fb2ec48dec00aed',
 '5c997a4dcb036407c224f02f',
 '5cabf09d37e86363e1402466',
 '5c3e92eeb6e2a72828e323f7',
 '5c5105bb1e0efe61e267b115',
 '5c5a44b98e82d95b2d57528c',
 '5c6379854efae707bc582ba2',
 '5c6cb7d37bf6

In [31]:
#Put into a data frame
clean = pd.DataFrame(CleanMatch)

In [32]:
clean

Unnamed: 0,0
0,5c3d3935b6e2a72828e2e59b
1,5c4677f2f26c301150bb86cc
2,5c4fb74b1e0efe61e2677a44
3,5c58ce418e82d95b2d56f748
4,5c6b5b037bf62409720964df
...,...
247,44830
248,46608
249,5c3e9384b6e2a72828e32471
250,5c75f1027bf62409720b6f9a


In [33]:
#Export all match ID's to csv. Format LEAGUE-SEASON-CONTENTS.
clean.to_csv("WCD-19W-Matches.csv")