# Football Fixtures Scraper

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import time
import json

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Opening JSON file
with open('teams_demo.json') as json_file:
    team_dict = json.load(json_file)

team_df = pd.DataFrame.from_dict(team_dict)




class Match():
    '''
    Match Webscraping and Analysis.
    Scraping Matches and

    Parameters
    ----------
    agent : string, default= Mozilla/5.0 (Windows NT 10.0; Windows; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36
        a characteristic string that lets servers and network peers identify the application,
        operating system, vendor, and/or version of the requesting user agent.

    '''

    def __init__(self,
                 agent='Mozilla/5.0 (Windows NT 10.0; Windows; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36'):
        self.agent = agent

    def last_fixtures(self, team, league):
        '''
        Obtain last 5 fixtures

        Parameters
        ----------
        team : string,
            name of team
        
        league : string,
            name of league

        Returns
        -------
        C : dict
            Returns last 5 fixtures

        '''
        url = team_df[(team_df['Team'] == team) & (team_df['League'] == league)]['url'].values[0]
        response = requests.get(url, headers={'User-Agent': self.agent})
        
        try:
            soup = BeautifulSoup(response.text, 'html.parser')
            table = soup.find_all('table', class_='matches')
            df = pd.read_html(str(table))[0]
            df.drop(df.columns[-2:], axis=1, inplace=True)
            df.rename(columns={'Outcome': 'Home team', 'Home team': 'Outcome', 'Score/Time': 'Away team', 'Competition':'League'}, inplace=True)
            
            return df[:5], df[5:]
        except ImportError:
            return ('incorect teamname, country or both')

In [3]:
team_df

Unnamed: 0,Country,League,Team,url
0,England,Premier League,Arsenal,https://ng.soccerway.com//teams/england/arsena...
1,England,Premier League,Manchester City,https://ng.soccerway.com//teams/england/manche...
2,England,Premier League,Newcastle United,https://ng.soccerway.com//teams/england/newcas...
3,England,Premier League,Manchester United,https://ng.soccerway.com//teams/england/manche...
4,England,Premier League,Tottenham Hotspur,https://ng.soccerway.com//teams/england/totten...
...,...,...,...,...
93,Spain,La Liga,Getafe,https://ng.soccerway.com//teams/spain/getafe-c...
94,Spain,La Liga,Celta Vigo,https://ng.soccerway.com//teams/spain/real-clu...
95,Spain,La Liga,Real Valladolid,https://ng.soccerway.com//teams/spain/real-val...
96,Spain,La Liga,Cadiz,https://ng.soccerway.com//teams/spain/cadiz-cl...


In [4]:
select_df = team_df[team_df['Team'] == 'Arsenal']
select_df

Unnamed: 0,Country,League,Team,url
0,England,Premier League,Arsenal,https://ng.soccerway.com//teams/england/arsena...


In [5]:
league = select_df['League'].tolist()
league

['Premier League']

In [6]:
matches = Match()
last_5, next_5 = matches.last_fixtures('Getafe', 'La Liga')

In [7]:
last_5.index = np.arange(1, len(last_5) + 1)
last_5

Unnamed: 0,Date,League,Home team,Outcome,Away team
1,30/12/22,LAL,Getafe,2 - 0,Mallorca
2,03/01/23,CDR,Levante,3 - 2,Getafe
3,08/01/23,LAL,Sevilla,2 - 1,Getafe
4,15/01/23,LAL,Getafe,1 - 2,Espanyol
5,22/01/23,LAL,Barcelona,1 - 0,Getafe


In [8]:
next_5.rename(columns={'Outcome':'Time'}, inplace=True)
next_5.index = np.arange(1, len(next_5) + 1)
next_5

Unnamed: 0,Date,League,Home team,Time,Away team
1,28/01/23,LAL,Getafe,21 : 00,Real Betis
2,04/02/23,LAL,Atletico Madrid,18 : 30,Getafe
3,12/02/23,LAL,Getafe,14 : 00,Rayo Vallecano
4,19/02/23,LAL,Getafe,-,Valencia
5,26/02/23,LAL,Villarreal,-,Getafe
