In [191]:
# Import packages
import requests
from datetime import datetime
from datetime import date
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium.webdriver.common.keys import Keys
import pyderman as pydriver
from selenium import webdriver
import numpy as np

In [252]:
class VblApi:
    
    """
    This class employs functions that use the VBL API to extract matchcodes and teamcodes
    """

    def opponents_teamguids(self, own_teamguid):
        """
        Start with our own teamguid and return a dictionary of all opponents teamnames + their guids
        """
        
        url_teamdetail = 'http://vblcb.wisseq.eu/VBLCB_WebService/data/TeamDetailByGuid?teamguid=' + own_teamguid
        opponents_teamguids = {}
        
        try:
            response = requests.get(url_teamdetail)
            for teamdetail in response.json()[0]['poules'][0]['teams']:
                opponents_teamguids[teamdetail['naam']] = teamdetail['guid'].replace(" ","+") # Teamname = key, teamguid = value. Replace spaces with '+' in key 

            opponents_teamguids = {key:val for key, val in opponents_teamguids.items() if val != own_teamguid} # Copy the dictionary but remove our own teamguid
            return(opponents_teamguids)

        except:
            print('Invalid Response or Error: ' + str(response))
        
    def opponent_matchguids(self, opponents_teamguid, opponents_teamname):
        """
        Start with a single opponents teamguid & return a list of all the matchguids that are before the current date, the hometeam, and the away team
        """   
        url_teammatches = 'http://vblcb.wisseq.eu/VBLCB_WebService/data/TeamMatchesByGuid?teamguid=' + opponents_teamguid
        opponents_matchguids = []
        today = date.today()

        try:
            response = requests.get(url_teammatches)
            for teammatches in response.json():
                if datetime.strptime(teammatches['datumString'], "%d-%m-%Y").date() < today: # If matchday is before today, then append the matchguid to list
                    if teammatches['tTNaam'] == opponents_teamname:
                        opponents_matchguids.append([teammatches['guid'], 'thuis', teammatches['datumString'], opponents_teamname])
                    if teammatches['tUNaam'] == opponents_teamname:
                        opponents_matchguids.append([teammatches['guid'], 'uit', teammatches['datumString'], opponents_teamname])
            return(opponents_matchguids)
        except:
            print('Invalid Response or Error: ' + str(response))

    def opponents_matches(self, opponents_teams):
        """
        Use the opponent_matchguids function to return a dictionary of all opponent names & all their matchguids before today
        """
        opponents_matches = {}

        for key, value in opponents_teams.items():
            opponents_matches[key] = self.opponent_matchguids(value, key)
        return(opponents_matches)

In [244]:
class Webdriver:
    
    """
    This is a class that generates a chrome webdriver and can be used to navigate the VBL website
    """
    
    def __init__(self):
        # Automatically create a webdriver when class Webdriver is called
        self.generate_driver()
        
    def generate_driver(self):

        
        try: 
            # Check if chromedriver is installed (necessary for selenium). Else, install it.
            path = pydriver.install(browser=pydriver.chrome)
            print('Installed chromedriver driver to path: %s' % path)

            # Create the chromedriver instance
            self.driver = webdriver.Chrome(path)

        except:
            print('chromedriver not installed correctly')
            
    def login(self, username, password):
        
        element_username = self.driver.find_element_by_xpath("/html/body/div[2]/form/div[1]/input")
        element_password = self.driver.find_element_by_xpath("/html/body/div[2]/form/div[2]/input")
        element_button = self.driver.find_element_by_xpath("/html/body/div[2]/form/div[4]/button")
        
        element_username.clear()
        element_username.send_keys(username)
            
        element_password.clear()
        element_password.send_keys(password)
        
        element_button.send_keys(Keys.RETURN)
        
        time.sleep(5) # make sure we are logged in before redirecting to a new page
     
    def redirect_page(self, url):   
        try:
            url = url
            self.driver.get(url)   
            time.sleep(1)
        except:
            print("Incorrect page")
        
    def export_verslag(self, thuis_uit):
        
        # Press buton 'Verslag'
        element_button = self.driver.find_element_by_link_text("Verslag")
        element_button.click()
        time.sleep(1)
        
        # Download tables under 'Verslag'
        #elem = self.driver.find_elements_by_xpath('//*[@id="teamthuis"]/div[3]/table')
        page = BeautifulSoup(self.driver.page_source, 'html.parser')     
        
        data = []
        
        if thuis_uit == "thuis":
            table_thuis = page.find('div', {'id':'teamthuis'})
            rows_thuis = table_thuis.select('table tbody tr')
            for row in rows_thuis:
                tds = row.select('td')
                if len(tds) == 4: # Check if the table has four columns
                    data.append([tds[0].text, tds[1].text, tds[2].text, tds[3].text]) # check for doubles in dict!!
       
        if thuis_uit == "uit":
            table_uit = page.find('div', {'id':'teamUit'})
            rows_uit = table_uit.select('table tbody tr')
            for row in rows_uit:
                tds = row.select('td')
                if len(tds) == 4: # Check if the table has four columns
                    data.append([tds[0].text, tds[1].text, tds[2].text, tds[3].text])
        
        return data[5:]

In [17]:
# read teamguids
with open("teamguids.txt") as f:
    teamguids = [line.rstrip() for line in f]

In [254]:
vbl_api = VblApi()
opponents_teamguids = vbl_api.opponents_teamguids(teamguids[1])
opponents_matchguids = vbl_api.opponents_matches(opponents_teamguids)

In [285]:
# login credentials
login = pd.read_csv("login.txt", sep = ": ", header = None, engine='python')
username = login[1][0]
password = login[1][1]

### Webdriver
# Call the webdriver class to generate a webdriver instance
website = Webdriver()

# Login using login credentials
website.redirect_page("https://vblweb.wisseq.eu/Home/login") # go to login page
website.login(username, password)

chromedriver is already installed.
Installed chromedriver driver to path: C:\Users\Lukas\Test jupyterlab\lib\chromedriver_86.0.4240.22.exe


In [292]:
# Use our dictionary of matchguids to redirect to each page and download the data tables. Only get tables that have data in them (no unplayed games in the data).

dfs_verslag = []

for teaminfo in [value for value in opponents_matchguids.values()]: # Go through the dictionary per team
    for matchinfo in teaminfo: # Go through each team per match
        url = "https://vblweb.wisseq.eu/Home/MatchDetail?wedguid=" + matchinfo[0] + "&ID=Uitslag" # Matchinfo[0] is the matchguid
        website.redirect_page(url)

        list_verslag = website.export_verslag(matchinfo[1])
        if list_verslag: # If list is not empty (= if match was played) 
            df_verslag = pd.DataFrame(list_verslag, columns = ["nr","naam","starter","punten"])   
            df_verslag['ploeg'] = matchinfo[3]
            df_verslag['thuis/uit'] = matchinfo[1]
            df_verslag['matchguid'] = matchinfo[0]
            df_verslag['datum'] = matchinfo[2]

            df_verslag = df_verslag.astype({'nr':'int32', 'naam':'string', 'starter':'string','punten':'int32','thuis/uit':'string','matchguid':'string'})
            if not all(df_verslag.punten.values == 0): # Extra check to see if match was played; if all the players made 0 points we assume the match wasn't played (yet)
                dfs_verslag.append(df_verslag)

ValueError: invalid literal for int() with base 10: 'Totaal'

In [287]:
pd.concat(dfs_verslag, ignore_index=True)

Unnamed: 0,nr,naam,starter,punten,ploeg,thuis/uit,matchguid,datum
0,4,Toon Ballyn,,2,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
1,5,Tim Jansen,100000.0,8,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
2,6,Victor Bes,,15,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
3,7,Finn De Vries,,7,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
4,8,Thomas Vingerhoets,100000.0,4,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
5,10,Jeroen Verpoort,100000.0,4,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
6,11,Timen De Prince,,10,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
7,12,Wout Van Bladel,,9,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
8,14,Max Rymen,100000.0,4,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020
9,20,Wannes Verberck,100000.0,2,BC Guco Lier Vzw HSE F,uit,BVBL20211189OR00148407,12-09-2020


In [240]:
opponents_matchguids

{'BC Guco Lier Vzw HSE F': [['BVBL20211189OR00148407', 'uit'],
  ['BVBL20211195OR00151823', 'thuis'],
  ['BVBL20211195OR00151824', 'thuis'],
  ['BVBL20219110ANHSE41BEG', 'uit'],
  ['BVBL20219110ANHSE41BFG', 'uit'],
  ['BVBL20219110ANHSE41BGA', 'thuis'],
  ['BVBL20219110ANHSE41BGD', 'thuis'],
  ['BVBL20219110ANHSE41BIG', 'uit'],
  ['BVBL20219110BANHSE1608', 'thuis'],
  ['BVBL20219110BANHSE3208', 'thuis']],
 'Phantoms Basket Boom HSE E': [['BVBL20219110ANHSE41BBH', 'uit'],
  ['BVBL20219110ANHSE41BCH', 'uit'],
  ['BVBL20219110ANHSE41BHE', 'thuis'],
  ['BVBL20219110ANHSE41BHF', 'thuis'],
  ['BVBL20219110ANHSE41BHJ', 'thuis'],
  ['BVBL20219110BANHSE3201', 'thuis']],
 'BBC Schelle HSE B': [['BVBL20211216OR00163402', 'thuis'],
  ['BVBL20211216OR00163404', 'thuis'],
  ['BVBL20211216OR00163405', 'thuis'],
  ['BVBL20211216OR00163409', 'thuis'],
  ['BVBL20211216OR00163414', 'thuis'],
  ['BVBL20211216OR00163416', 'thuis'],
  ['BVBL20211216OR00163419', 'thuis'],
  ['BVBL20211216OR00163422', 'thuis'