#Webscrapping Major Football Leagues Using Selenium. Importing Required libraries 

In [None]:
#importing necessary libraries
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.remote.webelement import WebElement
#excel dataframe
import xlsxwriter
#import By for element types
from selenium.webdriver.common.by import By
#import alert
from selenium.webdriver.common.alert import Alert

from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import ElementClickInterceptedException

Specifying the objects to obtain specifying the Date, Home Team Name, Home Team Half Time Score, Home Team Full Time Score, Away Team Name, Away Team Half Time Score and Away Team Full Time Score. We will start from the premier league, note that we will use the same function for all the other data sets.

In [None]:
# english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/england/premier-league/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

In [3]:
#turning the element list into a data frame using pandas
#i will go to the steps of dealing with each row/column
#then maybe finally write a function that does that cleaning in
#the above data frame does not look very good

Converting the list of elements obtained into a data frame using Pandas Data Frame

In [10]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)

In [4]:
df.head()

Unnamed: 0,Date,Home Team,Home Team HT,Home Team FT,Away Team,Away Team HT,Away Team FT
0,06.11. 19:30,Tottenham,(0),1.0,Liverpool,(2),2.0
1,06.11. 17:00,Aston Villa,(2),3.0,Manchester Utd,(1),1.0
2,06.11. 17:00,Southampton,(0),1.0,Newcastle,(1),4.0
3,06.11. 17:00,West Ham,(1),1.0,Crystal Palace,(1),2.0
4,06.11. 15:00,Chelsea,(0),0.0,Arsenal,(0),1.0


As seen above the data needs some cleaning. First we remove the brackets from the half time scores for both the home team and the away team. Then we deal with separating the date of the match and the time. Also we need to convert the Full Time score into an integer, to represent the number of goals as an integer. So we will make one function to do it all at once.

In [2]:
#making one data cleaning function for the all data sets
def clean_data(df):
    #removing the brackets in Half Time Scores
    df['Home Team HT']=df['Home Team HT'].str.replace('(','')
    df['Home Team HT']=df['Home Team HT'].str.replace(')','')

    df['Away Team HT']=df['Away Team HT'].str.replace('(','')
    df['Away Team HT']=df['Away Team HT'].str.replace(')','')
    
    
    #converting columns to integers
    df['Home Team HT']=df['Home Team HT'].astype(int)
    df['Away Team HT']=df['Away Team HT'].astype(int)
    df['Home Team FT']=df['Home Team FT'].astype(int)
    df['Away Team FT']=df['Away Team FT'].astype(int)
    
    
    #converting date column into date time
    d=df['Date'].str.split(' ',expand=True) #expanding by space
    #labelling d
    d.rename(columns={0:'date',
                 1:'time'},inplace=True)
    #adding 2022 to date column
    d['date']=d['date'] + '2022'
    #turning into date and time types
    d['date']=pd.to_datetime(d['date'],format='%d.%m.%Y')

    #converting time column into time
    #d['time']=pd.to_datetime(d['time'],format='%H:%M')

    #merging with the final data set
    df=pd.merge(df,d,on=df.index)
    
    df.drop(columns=['key_0','Date'],inplace=True)
    
    #creating Columns of No of Goals
    df['Total HT Goals']=df['Home Team HT'] + df['Away Team HT']
    df['Total FT Goals']=df['Home Team FT'] + df['Away Team FT']
    df['FT-HT Goals']=df['Total FT Goals'] - df['Total HT Goals']
    
    #save csv
    return(df)

In [11]:
df2=clean_data(df)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')


In [7]:
df2.head()

Unnamed: 0,Home Team,Home Team HT,Home Team FT,Away Team,Away Team HT,Away Team FT,date,time,Total HT Goals,Total FT Goals,FT-HT Goals
0,Tottenham,0,1,Liverpool,2,2,2022-11-06,19:30,2,3,1
1,Aston Villa,2,3,Manchester Utd,1,1,2022-11-06,17:00,3,4,1
2,Southampton,0,1,Newcastle,1,4,2022-11-06,17:00,1,5,4
3,West Ham,1,1,Crystal Palace,1,2,2022-11-06,17:00,2,3,1
4,Chelsea,0,0,Arsenal,0,1,2022-11-06,15:00,0,1,1


In [12]:
df2.to_csv('premier league3.csv',index=False)

In [9]:
#la liga
"https://www.livesport.com/en/soccer/spain/laliga/results/"
#english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/spain/laliga/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

  driver=webdriver.Chrome(ChromeDriverManager().install())


[['04.11. 23:00', 'Girona', '(0)', '2', 'Ath Bilbao', '(0)', '1'], ['31.10. 23:00', 'Elche', '(0)', '0', 'Getafe', '(0)', '1']]


In [10]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)


In [11]:
df2=clean_data(df)

df2.to_csv('la liga.csv',index=False)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')


In [12]:
#

In [13]:
#ligue 1
'https://www.livesport.com/en/soccer/france/ligue-1/results/'
#english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/france/ligue-1/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

  driver=webdriver.Chrome(ChromeDriverManager().install())


[['04.11. 23:00', 'Troyes', '(1)', '1', 'Auxerre', '(0)', '1'], ['30.10. 22:45', 'Lyon', '(0)', '1', 'Lille', '(0)', '0']]


In [14]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)


In [15]:
df2=clean_data(df)

df2.to_csv('ligue one.csv',index=False)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')


In [6]:
#bundesliga
"https://www.livesport.com/en/soccer/germany/bundesliga/results/"
#english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/germany/bundesliga/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

  driver=webdriver.Chrome(ChromeDriverManager().install())


[['11.11. 22:30', 'B. Monchengladbach', '(3)', '4', 'Dortmund', '(2)', '2'], ['09.11. 22:30', 'Eintracht Frankfurt', '(3)', '4', 'Hoffenheim', '(1)', '2']]


In [7]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)


In [8]:
df2=clean_data(df)

df2.to_csv('bundesliga.csv',index=False)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')


In [19]:
#seria A
"https://www.livesport.com/en/soccer/italy/serie-a/results/"
#english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/italy/serie-a/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

  driver=webdriver.Chrome(ChromeDriverManager().install())


[['04.11. 22:45', 'Udinese', '(0)', '1', 'Lecce', '(1)', '1'], ['31.10. 22:45', 'Monza', '(0)', '1', 'Bologna', '(0)', '2']]


In [20]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)


In [21]:
df2=clean_data(df)

df2.to_csv('seriea.csv',index=False)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')


In [3]:
#Europa League
"https://www.livesport.com/en/soccer/europe/europa-league/results/"
#english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/europe/europa-league/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

  driver=webdriver.Chrome(ChromeDriverManager().install())


[['27.10. 22:00', 'Crvena zvezda', '(1)', '2', 'Trabzonspor', '(1)', '1'], ['27.10. 22:00', 'Ferencvaros', '(0)', '1', 'Monaco', '(1)', '1']]


In [4]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)


In [5]:
df2=clean_data(df)

df2.to_csv('europa.csv',index=False)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')


In [6]:
df2.head()

Unnamed: 0,Home Team,Home Team HT,Home Team FT,Away Team,Away Team HT,Away Team FT,date,time,Total HT Goals,Total FT Goals,FT-HT Goals
0,Crvena zvezda,1,2,Trabzonspor,1,1,2022-10-27,22:00,2,3,1
1,Ferencvaros,0,1,Monaco,1,1,2022-10-27,22:00,1,2,1
2,Freiburg,0,1,Olympiacos Piraeus,1,1,2022-10-27,22:00,1,2,1
3,HJK,0,1,AS Roma,1,2,2022-10-27,22:00,1,3,2
4,Manchester Utd,1,3,Sheriff Tiraspol,0,0,2022-10-27,22:00,1,3,2


In [3]:
#Croatia
"https://www.livesport.com/en/soccer/croatia/druga-nl/results/"
#english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/croatia/druga-nl/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

  driver=webdriver.Chrome(ChromeDriverManager().install())


[['11.11. 16:00', 'Opatija', '(0)', '1', 'Zrinski Jurjevac', '(0)', '2'], ['11.11. 16:00', 'Sesvete', '(0)', '0', 'Hrvace', '(2)', '3']]


In [4]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)


In [5]:
df2=clean_data(df)

df2.to_csv('croatia.csv',index=False)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')


In [16]:
#seric C
"https://www.livesport.com/en/soccer/italy/serie-c-group-a/results/"
#english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/italy/serie-c-group-a/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

  driver=webdriver.Chrome(ChromeDriverManager().install())


[['12.11. 16:30', 'AlbinoLeffe', '(1)', '1', 'Sangiuliano City', '(0)', '1'], ['12.11. 16:30', 'Arzignano', '(0)', '0', 'Mantova', '(1)', '1']]


In [17]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)


In [18]:
df2=clean_data(df)

df2.to_csv('serie c.csv',index=False)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')


In [19]:
#ligue 2
"https://www.livesport.com/en/soccer/france/ligue-2/results/"
#english premier league
#putting an object to have the list
element_list=[]

driver=webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.livesport.com/en/soccer/france/ligue-2/results/")

date=driver.find_elements(By.CLASS_NAME,"event__time")
home_team=driver.find_elements(By.CLASS_NAME,"event__participant--home")
home_half=driver.find_elements(By.CLASS_NAME,"event__part--home.event__part--1")
home_full=driver.find_elements(By.CLASS_NAME,"event__score--home")
away_team=driver.find_elements(By.CLASS_NAME,"event__participant--away")
away_half=driver.find_elements(By.CLASS_NAME,"event__part--away.event__part--1")
away_full=driver.find_elements(By.CLASS_NAME,"event__score--away")


for i in range(len(date)):
    element_list.append([date[i].text,home_team[i].text,home_half[i].text,home_full[i].text,
                        away_team[i].text,away_half[i].text,away_full[i].text])
print(element_list[:2])

#closing the driver
driver.close()

  driver=webdriver.Chrome(ChromeDriverManager().install())


[['12.11. 17:00', 'St Etienne', '(0)', '0', 'Rodez', '(0)', '2'], ['07.11. 22:45', 'Metz', '(2)', '3', 'St Etienne', '(1)', '2']]


In [20]:
#making the element list a data frame
import pandas as pd 
# List1 
df = pd.DataFrame(element_list, columns =['Date','Home Team','Home Team HT',
                                          'Home Team FT','Away Team',
                                          'Away Team HT','Away Team FT'],
                  dtype = float)


In [21]:
df2=clean_data(df)

df2.to_csv('ligue 2.csv',index=False)

  df['Home Team HT']=df['Home Team HT'].str.replace('(','')
  df['Home Team HT']=df['Home Team HT'].str.replace(')','')
  df['Away Team HT']=df['Away Team HT'].str.replace('(','')
  df['Away Team HT']=df['Away Team HT'].str.replace(')','')
