# Football Fixtures Scraper

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import time
import json

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Opening JSON file
with open('teams_demo.json') as json_file:
    team_dict = json.load(json_file)

team_df = pd.DataFrame.from_dict(team_dict)

In [3]:
team_df

Unnamed: 0,Country,League,Team,url
0,England,Premier League,Arsenal,https://ng.soccerway.com//teams/england/arsena...
1,England,Premier League,Manchester City,https://ng.soccerway.com//teams/england/manche...
2,England,Premier League,Newcastle United,https://ng.soccerway.com//teams/england/newcas...
3,England,Premier League,Manchester United,https://ng.soccerway.com//teams/england/manche...
4,England,Premier League,Tottenham Hotspur,https://ng.soccerway.com//teams/england/totten...
...,...,...,...,...
93,Spain,La Liga,Getafe,https://ng.soccerway.com//teams/spain/getafe-c...
94,Spain,La Liga,Celta Vigo,https://ng.soccerway.com//teams/spain/real-clu...
95,Spain,La Liga,Real Valladolid,https://ng.soccerway.com//teams/spain/real-val...
96,Spain,La Liga,Cadiz,https://ng.soccerway.com//teams/spain/cadiz-cl...


In [4]:
team = 'Arsenal'
league = 'Premier League'
header = 'Mozilla/5.0 (Windows NT 10.0; Windows; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36'

In [5]:
url = team_df[(team_df['Team'] == team) & (team_df['League'] == league)]['url'].values[0]
response = requests.get(url, headers={'User-Agent': header})

In [8]:
url

'https://ng.soccerway.com//teams/england/arsenal-fc/660/'

In [6]:
response

<Response [200]>

In [7]:
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find_all('table', class_='matches')

#### Get Dates

In [117]:
table[0].find_all('td', class_ = "full-date")[0].text

'31/12/22'

In [118]:
dates = [row.text for row in table[0].find_all('td', class_ = "full-date")]
dates

['31/12/22',
 '03/01/23',
 '09/01/23',
 '15/01/23',
 '22/01/23',
 '27/01/23',
 '04/02/23',
 '11/02/23',
 '15/02/23',
 '18/02/23']

#### Get League

In [119]:
table[0].find_all('td', class_="competition")[1].text

'PRL'

In [120]:
leagues = [row.text.strip() for row in table[0].find_all('td', class_="competition")]
leagues

['PRL', 'PRL', 'FAC', 'PRL', 'PRL', 'FAC', 'PRL', 'PRL', 'PRL', 'PRL']

#### Get Home Team

In [121]:
table[0].find_all('td', class_ = "team")[2].text.strip()

'Arsenal'

In [105]:
homes = [row.text.strip() for row in table[0].find_all('td', class_ = "team")[::2]]
homes

['Brighton & Hove Albion',
 'Arsenal',
 'Oxford United',
 'Tottenham Hotspur',
 'Arsenal',
 'Manchester City',
 'Everton',
 'Arsenal',
 'Arsenal',
 'Aston Villa']

#### Get Away Team

In [106]:
aways = [row.text.strip() for row in table[0].find_all('td', class_ = "team")[1::2]]
aways

['Arsenal',
 'Newcastle United',
 'Arsenal',
 'Arsenal',
 'Manchester United',
 'Arsenal',
 'Arsenal',
 'Brentford',
 'Manchester City',
 'Arsenal']

#### Get Score/Time

In [122]:
table[0].find_all('td', class_="score-time")[3].text.strip()

'0 - 2'

In [123]:
scores = [row.text.strip() for row in table[0].find_all('td', class_="score-time")]
scores

['2 - 4',
 '0 - 0',
 '0 - 3',
 '0 - 2',
 '3 - 2',
 '21 : 00',
 '13 : 30',
 '16 : 00',
 '20 : 30',
 '13 : 30']

In [124]:
# Create Empty dataframe
df = pd.DataFrame({'Date': dates,
                    'League': leagues,
                    'Home team': homes,
                    'Score': scores,
                    'Away team': aways
                    })

In [125]:
df

Unnamed: 0,Date,League,Home team,Score,Away team
0,31/12/22,PRL,Brighton & Hove Albion,2 - 4,Arsenal
1,03/01/23,PRL,Arsenal,0 - 0,Newcastle United
2,09/01/23,FAC,Oxford United,0 - 3,Arsenal
3,15/01/23,PRL,Tottenham Hotspur,0 - 2,Arsenal
4,22/01/23,PRL,Arsenal,3 - 2,Manchester United
5,27/01/23,FAC,Manchester City,21 : 00,Arsenal
6,04/02/23,PRL,Everton,13 : 30,Arsenal
7,11/02/23,PRL,Arsenal,16 : 00,Brentford
8,15/02/23,PRL,Arsenal,20 : 30,Manchester City
9,18/02/23,PRL,Aston Villa,13 : 30,Arsenal


In [126]:
class Match():
    '''
    Match Webscraping and Analysis.
    Scraping Matches and

    Parameters
    ----------
    agent : string, default= Mozilla/5.0 (Windows NT 10.0; Windows; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36
        a characteristic string that lets servers and network peers identify the application,
        operating system, vendor, and/or version of the requesting user agent.

    '''

    def __init__(self,
                 agent='Mozilla/5.0 (Windows NT 10.0; Windows; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36'):
        self.agent = agent

    def last_fixtures(self, team, league):
        '''
        Obtain last 5 fixtures

        Parameters
        ----------
        team : string,
            name of team
        
        league : string,
            name of league

        Returns
        -------
        C : dict
            Returns last 5 fixtures

        '''
        url = team_df[(team_df['Team'] == team) & (team_df['League'] == league)]['url'].values[0]
        response = requests.get(url, headers={'User-Agent': self.agent})
        
        try:
            soup = BeautifulSoup(response.text, 'html.parser')
            table = soup.find_all('table', class_='matches')
            dates = [row.text for row in table[0].find_all('td', class_ = "full-date")]
            leagues = [row.text.strip() for row in table[0].find_all('td', class_="competition")]
            homes = [row.text.strip() for row in table[0].find_all('td', class_ = "team")[::2]]
            aways = [row.text.strip() for row in table[0].find_all('td', class_ = "team")[1::2]]
            scores = [row.text.strip() for row in table[0].find_all('td', class_="score-time")]
            
            # Create Empty dataframe
            df = pd.DataFrame({'Date': dates,
                                'League': leagues,
                                'Home team': homes,
                                'Score': scores,
                                'Away team': aways
                                })
            
            return df[:5], df[5:]
        except ImportError:
            return ('incorect teamname, country or both')

In [4]:
select_df = team_df[team_df['Team'] == 'Arsenal']
select_df

Unnamed: 0,Country,League,Team,url
0,England,Premier League,Arsenal,https://ng.soccerway.com//teams/england/arsena...


In [5]:
league = select_df['League'].tolist()
league

['Premier League']

In [127]:
matches = Match()
last_5, next_5 = matches.last_fixtures('Getafe', 'La Liga')

In [128]:
last_5.index = np.arange(1, len(last_5) + 1)
last_5

Unnamed: 0,Date,League,Home team,Score,Away team
1,30/12/22,LAL,Getafe,2 - 0,Mallorca
2,03/01/23,CDR,Levante,3 - 2,Getafe
3,08/01/23,LAL,Sevilla,2 - 1,Getafe
4,15/01/23,LAL,Getafe,1 - 2,Espanyol
5,22/01/23,LAL,Barcelona,1 - 0,Getafe


In [129]:
next_5.rename(columns={'Outcome':'Time'}, inplace=True)
next_5.index = np.arange(1, len(next_5) + 1)
next_5

Unnamed: 0,Date,League,Home team,Score,Away team
1,28/01/23,LAL,Getafe,21 : 00,Real Betis
2,04/02/23,LAL,Atletico Madrid,18 : 30,Getafe
3,12/02/23,LAL,Getafe,14 : 00,Rayo Vallecano
4,19/02/23,LAL,Getafe,-,Valencia
5,26/02/23,LAL,Villarreal,-,Getafe
