In [3]:
!pip install pandas
!pip install lxml
!pip install html5lib
!pip install beautifulsoup4



In [22]:
import pandas as pd
from bs4 import BeautifulSoup, NavigableString
import requests
from datetime import datetime

In [3]:
tables = pd.read_html('https://www.premierleague.com/tables')

In [4]:
first_team = tables[0].iloc[0::2]
first_team.pop('Form'), first_team.pop('Unnamed: 12'), first_team.pop('Next')
first_team.to_csv('epl_table.csv', index=False)

In [5]:
web = 'https://www.skysports.com/premier-league-results'
response = requests.get(web)
content = response.text
soup = BeautifulSoup(content, 'lxml')

matches = soup.find_all('div', class_='fixres__item')

score = []
home = []
away = []

for match in matches:
    score.append(match.find('span', class_='matches__teamscores').get_text())
    score_lst = [[int(num) for num in item.split() if num.isdigit()] for item in score]
    home.append(match.find('span', class_='matches__item-col matches__participant matches__participant--side1').get_text())
    home_lst = [item.strip() for item in home]
    away.append(match.find('span', class_='matches__item-col matches__participant matches__participant--side2').get_text())
    away_lst = [item.strip() for item in away]

dict_epl = {'Home': home_lst, 'Score': score_lst, 'Away': away_lst}
df_epl = pd.DataFrame(dict_epl)
df_epl.to_csv('epl_results.csv', index=False)

In [24]:
web = 'https://www.skysports.com/premier-league-fixtures'
response = requests.get(web)
content = response.text
soup = BeautifulSoup(content, 'lxml')

fixture_date = soup.find('div', class_='fixres__body')
date_lst = []
home = []
away = []

current_date = None
for element in fixture_date:
    if isinstance(element, NavigableString):
        continue
    if element.name == 'h4' and 'fixres__header2' in element.get('class', []):
        current_date = element.text
    elif element.name == 'div' and 'fixres__item' in element.get('class', []):
        if current_date is not None:
            date_lst.append(current_date)
            home.append(element.find('span', class_='matches__item-col matches__participant matches__participant--side1').get_text().strip())
            away.append(element.find('span', class_='matches__item-col matches__participant matches__participant--side2').get_text().strip())

dict_fix = {'date': date_lst, 'Home': home, 'Away': away}
df_fix = pd.DataFrame(dict_fix)

current_year = datetime.now().year
current_month = datetime.now().strftime('%B')
first_half = ["January", "February", "March", "April", "May"]
second_half = ["June", "July", "August", "September", "October", "November", "December"]

def add_year(date_str):
    day, date, month = date_str.split()
    date = date.replace('st', '').replace('nd', '').replace('rd', '').replace('th', '')
    if month in second_half and current_month in second_half:
        return f"{day}, {date} {month} {current_year}"
    elif month in first_half and current_month in first_half:
        return f"{day}, {date} {month} {current_year}"
    elif month in second_half and current_month in first_half:
        return f"{day}, {date} {month} {current_year - 1}"
    elif month in first_half and current_month in second_half:
        return f"{day}, {date} {month} {current_year + 1}"
    else:
        return date_str
    
df_fix['date'] = df_fix['date'].apply(add_year)

print(df_fix)


                            date                     Home  \
0    Saturday 30th December 2023               Luton Town   
1    Saturday 30th December 2023              Aston Villa   
2    Saturday 30th December 2023           Crystal Palace   
3    Saturday 30th December 2023          Manchester City   
4    Saturday 30th December 2023  Wolverhampton Wanderers   
..                           ...                      ...   
186         Sunday 19th May 2024           Crystal Palace   
187         Sunday 19th May 2024                Liverpool   
188         Sunday 19th May 2024               Luton Town   
189         Sunday 19th May 2024          Manchester City   
190         Sunday 19th May 2024         Sheffield United   

                        Away  
0                    Chelsea  
1                    Burnley  
2                  Brentford  
3           Sheffield United  
4                    Everton  
..                       ...  
186              Aston Villa  
187  Wolverhampton 

In [7]:
df_fix.to_csv('epl_fixture.csv', index=False)