In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from fake_useragent import UserAgent

## BBC Sports Premier League Table

In [15]:
url = 'https://www.bbc.com/sport/football/premier-league/table'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')

table = soup.find('table', class_ = "ssrcss-14j0ip6-Table e3bga5w5")

headers = []
for i in table.find_all('th'):
    title = i.text
    headers.append(title)

league_table = pd.DataFrame(columns = headers)

for j in table.find_all('tr')[1:]:
    row_data = j.find_all('td')
    row = [i.text for i in row_data] 
    length = len(league_table)
    league_table.loc[length] = row
league_table.drop(['Form, Last 6 games, Oldest first'],axis = 1, inplace = True)

league_table

Unnamed: 0,Position,Team,Played,Won,Drawn,Lost,Goals For,Goals Against,Goal Difference,Points
0,1,Liverpool,22,16,5,1,54,21,33,53
1,2,Arsenal,23,13,8,2,44,21,23,47
2,3,Nottingham Forest,23,13,5,5,33,27,6,44
3,4,Manchester City,23,12,5,6,47,30,17,41
4,5,Newcastle United,23,12,5,6,41,27,14,41
5,6,Chelsea,23,11,7,5,45,30,15,40
6,7,AFC Bournemouth,23,11,7,5,41,26,15,40
7,8,Aston Villa,23,10,7,6,34,35,-1,37
8,9,Brighton & Hove Albion,23,8,10,5,35,31,4,34
9,10,Fulham,23,8,9,6,34,31,3,33


## Getting table data from worldfootball

In [8]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# Define the URL of the webpage containing Premier League winners
url = 'https://www.worldfootball.net/winner/eng-premier-league/'

# Initialize an empty list to store table headers
headers = []

# Send an HTTP GET request to fetch the webpage content
page = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(page.text, "html.parser")

# Find the specific table with class "standard_tabelle" containing the winners data
table = soup.find("table", class_="standard_tabelle")

# Extract column headers from the table
for i in table.find_all('th'):
    title = i.text  # Get the text from each header cell
    headers.append(title)  # Append header to the list

# Create an empty DataFrame with extracted headers
winners = pd.DataFrame(columns=headers)

# Loop through each row in the table (excluding the first row, which is headers)
for j in table.find_all('tr')[1:]:
    row_data = j.find_all('td')  # Extract all table data cells
    row = [i.text for i in row_data]  # Extract text from each cell
    length = len(winners)  # Get current length of the DataFrame
    winners.loc[length] = row  # Append the row data to the DataFrame

# Drop any empty column (if exists)
winners = winners.drop([''], axis=1)

# Clean the 'Year' column by removing newline characters
winners['Year'] = winners['Year'].str.replace('\n', '')

winners


Unnamed: 0,Year,Winner,Country
0,2024,Manchester City,England
1,2023,Manchester City,England
2,2022,Manchester City,England
3,2021,Manchester City,England
4,2020,Liverpool FC,England
...,...,...,...
120,1893,Sunderland AFC,England
121,1892,Sunderland AFC,England
122,1891,Everton FC,England
123,1890,Preston North End,England


## Getting Data from Ourlads link below


In [15]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from fake_useragent import UserAgent

# Define the URL of the webpage containing NCAA football depth charts
url = 'https://www.ourlads.com/ncaa-football-depth-charts/depth-chart/army/90038'

# Generate a random user agent to mimic a real browser request
ua = UserAgent()
userAgent = ua.random
head = {'User-Agent': userAgent}

# Send an HTTP GET request to fetch the webpage content
page = requests.get(url, headers=head)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(page.content, "html.parser")

# Extract the team name from the webpage
team_name = soup.find('div', {'class': 'pt-team'}).text.strip()

# Find the specific table with class "table table-bordered" containing depth chart data
table = soup.find('table', class_='table table-bordered')

# Extract column headers from the table
headers = [th.text.strip() for th in table.find_all('th')]

# Initialize an empty list to store rows of data
rows = []

# Loop through each row in the table body
for tr in table.find('tbody').find_all('tr'):
    cells = [td.text.strip() for td in tr.find_all('td')]  # Extract text from each cell
    rows.append(cells)  # Append row data to the list

# Create a DataFrame from the extracted table data
df = pd.DataFrame(rows, columns=headers)

# Add a new column for team name
df['team_name'] = team_name

# Identify and label field positions (Offense, Defense, Special Teams)
df['field_pos'] = df['Pos'].where(df['Pos'].isin(['Offense', 'Defense', 'Special Teams']))

# Forward fill field position labels
df['field_pos'] = df['field_pos'].ffill()

# Remove rows that contain only position headers (Offense, Defense, Special Teams)
df = df[~df['Pos'].isin(['Offense', 'Defense', 'Special Teams', 'OFF', 'ST', 'DEF'])]

# Print the final DataFrame
df


Unnamed: 0,Pos,No.,Player 1,No,Player 2,No.1,Player 3,No.2,Player 4,No.3,Player 5,team_name,field_pos
2,WR,4,"Fortner, Liam JR",87.0,"Reynolds, Casey SR",84.0,"Schurr, Cam SR",,,,,Army Black Knights,Offense
3,LT,60,"Finucane, Connor SR",77.0,"Law, Jordyn SR",,,,,,,Army Black Knights,Offense
4,LG,53,"Katsigiannis, Bill SR",50.0,"Bartosh, Braden JR",,,,,,,Army Black Knights,Offense
5,C,51,"Small, Brady SO",75.0,"Kloska, Kyle SO/TR",,,,,,,Army Black Knights,Offense
6,RG,71,"Gennarelli, Paolo SO",59.0,"Jeffcoat, Will SR",,,,,,,Army Black Knights,Offense
7,RT,65,"Scott, Lucas SR",58.0,"Appleton, Henry SO",,,,,,,Army Black Knights,Offense
8,TE,85,"Crossan, David SR",44.0,"Poloskey, Parker SO",83.0,"Williams, Teddy SO",,,,,Army Black Knights,Offense
9,QB,13,"Daily, Bryson SR",10.0,"Coleman, Dewayne JR",3.0,"Hellums, Cale SO",,,,,Army Black Knights,Offense
10,RB-A,8,"Reed, Hayden JR",33.0,"Rendina, Jake SO",48.0,"Smith, Carson SO",,,,,Army Black Knights,Offense
11,RB-B,22,"Stewart, Miles SR",86.0,"Montesi, Will SR",,,,,,,Army Black Knights,Offense
