# HTML Parsing BeautifulSoup: A Mini Project
* Scrape data from a public website
* Organize the data as a dataframe
* Export the data as an excel sheet
* URL: https://www.bbc.com/sport/football/premier-league/top-scorers

In [25]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from openpyxl.workbook import Workbook

In [27]:
url = "https://www.bbc.com/sport/football/premier-league/top-scorers"

In [29]:
player_names = []
team_names = []
goals = []
assists = []
num_matches = []
shots = []

In [31]:
try:
    response = requests.get(url)
    response.raise_for_status
except Exception as e:
    print(e)
else:
    soup = BeautifulSoup(response.content, 'html.parser')
    print("Successful..!")
    players = (soup.find('tbody').find_all("tr", class_="ssrcss-qqhdqi-TableRowBody e1icz100"))
    for player in players:
        player_name = (player.find('div', class_="ssrcss-m6ah29-PlayerName e1n8xy5b1").get_text(strip=True))
        team_name = (player.find('div', class_="ssrcss-qvpga1-TeamsSummary e1n8xy5b0").get_text(strip=True))
        goal_scored = int(player.find("div", class_="ssrcss-18ap757-CellWrapper ef9ipf0").get_text(strip=True))

        stats = player.find_all("div", class_="ssrcss-1vo7v3r-CellWrapper ef9ipf0")
        assists_made = int(stats[0].get_text(strip=True))
        matches_played = int(stats[2].get_text(strip=True))
        shots_taken = int(stats[-3].get_text(strip=True))
        
        goals.append(goal_scored)
        team_names.append(team_name)
        player_names.append(player_name)
        assists.append(assists_made)
        num_matches.append(matches_played)
        shots.append(shots_taken)


    data = {
        "player": player_names,
        "team": team_names,
        "matches": num_matches,
        "Goals": goals,
        "Assists": assists,
        "Shots": shots
    }

    player_df = pd.DataFrame(data)
        

Successful..!


In [32]:
player_df

Unnamed: 0,player,team,matches,Goals,Assists,Shots
0,E. Haaland,Man City,12,14,1,50
1,Igor Thiago,Brentford,12,9,0,28
2,D. Welbeck,Brighton,12,7,0,18
3,A. Semenyo,Bournemouth,11,6,3,23
4,J. Mateta,Crystal Palace,12,6,0,33
5,Richarlison,Spurs,12,5,2,20
6,B. Mbeumo,Man Utd,12,5,1,29
7,João Pedro,Chelsea,12,4,3,21
8,E. Eze,Arsenal,11,4,2,31
9,Pedro Neto,Chelsea,12,4,2,18


In [35]:
player_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   player   40 non-null     object
 1   team     40 non-null     object
 2   matches  40 non-null     int64 
 3   Goals    40 non-null     int64 
 4   Assists  40 non-null     int64 
 5   Shots    40 non-null     int64 
dtypes: int64(4), object(2)
memory usage: 2.0+ KB


In [37]:
player_df.describe()

Unnamed: 0,matches,Goals,Assists,Shots
count,40.0,40.0,40.0,40.0
mean,11.2,4.075,1.0,19.725
std,1.042679,2.05548,0.9337,8.249281
min,9.0,3.0,0.0,7.0
25%,10.75,3.0,0.0,15.0
50%,12.0,3.0,1.0,17.5
75%,12.0,4.0,2.0,24.25
max,12.0,14.0,3.0,50.0


### Exporting our scraped data into an Excel spreadsheet

In [39]:
player_df.to_excel('EPL Top scorers.xlsx', index=False)

In [56]:
player_names

['E. Haaland',
 'Igor Thiago',
 'D. Welbeck',
 'A. Semenyo',
 'J. Mateta',
 'Richarlison',
 'B. Mbeumo',
 'João Pedro',
 'E. Eze',
 'Pedro Neto',
 'Mohamed Salah',
 'J. Anthony',
 'E. Fernández',
 'I. Ndiaye',
 'J. Kroupi',
 'C. Wilson',
 'N. Woltemade',
 'W. Isidor',
 'V. Gyökeres',
 'L. Trossard',
 'E. Buendía',
 'C. Gakpo',
 'Bruno Guimarães',
 'M. Rogers',
 'D. Malen',
 'H. Ekitiké',
 'Casemiro',
 'R. Gravenberch',
 'I. Sarr',
 'M. Caicedo',
 'M. Tavernier',
 'M. Gibbs-White',
 'K. Schade',
 'J. Bowen',
 'L. Nmecha',
 'Z. Flemming',
 'H. Barnes',
 'B. Saka',
 'Lucas Paquetá',
 'M. van de Ven']

In [74]:
team_names

['Man City',
 'Brentford',
 'Brighton',
 'Bournemouth',
 'Crystal Palace',
 'Spurs',
 'Man Utd',
 'Chelsea',
 'Arsenal',
 'Chelsea',
 'Liverpool',
 'Burnley',
 'Chelsea',
 'Everton',
 'Bournemouth',
 'West Ham',
 'Newcastle',
 'Sunderland',
 'Arsenal',
 'Arsenal',
 'Aston Villa',
 'Liverpool',
 'Newcastle',
 'Aston Villa',
 'Aston Villa',
 'Liverpool',
 'Man Utd',
 'Liverpool',
 'Crystal Palace',
 'Chelsea',
 'Bournemouth',
 'Nottm Forest',
 'Brentford',
 'West Ham',
 'Leeds',
 'Burnley',
 'Newcastle',
 'Arsenal',
 'West Ham',
 'Spurs']

In [80]:
goals

[14,
 9,
 7,
 6,
 6,
 5,
 5,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3]