### Import the necessary libraries and run Beautiful Soup

In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
from urllib.request import urlopen
import numpy as np

In [2]:
# NBA season we will be analyzing
year = 2019
# URL page we will scraping (see image above)
url = "https://www.basketball-reference.com/wnba/years/{}_per_game.html".format(year)
# this is the HTML from the given URL
html = urlopen(url)
soup = BeautifulSoup(html)

### Get the Player Names (Rows)

In [3]:
# Find all the table headers within the soup.  Pull the text from the anchor, which will be the player names.
# Turn this list into a dataframe
result1 = soup.find_all('th', class_='left')

names = []
for result in result1:
    names.append(result.a.text)

player_names = pd.DataFrame(names)
player_names


Unnamed: 0,0
0,Natalie Achonwa
1,Kayla Alexander
2,Rebecca Allen
3,Jillian Alleyne
4,Kristine Anigwe
...,...
164,Han Xu
165,Jackie Young
166,Tamera Young
167,Amanda Zahui B.


### Get the Column Headers

In [4]:
# use findALL() to get the column headers
soup.findAll('tr', limit=2)
# use getText()to extract the text we need into a list
headers = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
# exclude the first column as we will not need the ranking order from Basketball Reference for the analysis
headers = headers[1:]
headers

['Tm',
 'G',
 'GS',
 'MP',
 'FG',
 'FGA',
 'FG%',
 '3P',
 '3PA',
 '3P%',
 '2P',
 '2PA',
 '2P%',
 'eFG%',
 'FT',
 'FTA',
 'FT%',
 'ORB',
 'TRB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS']

### Make the Pandas Dataframe

In [5]:
# avoid the first header row
rows = soup.findAll('tr')[1:]
player_stats = [[td.getText() for td in rows[i].findAll('td')]
            for i in range(len(rows))]
stats = pd.DataFrame(player_stats, columns = headers).dropna(how='all').reset_index().drop(columns=['index'])
stats

Unnamed: 0,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,...,FTA,FT%,ORB,TRB,AST,STL,BLK,TOV,PF,PTS
0,IND,30,18,21.2,3.3,6.8,.488,0.1,0.4,.250,...,2.2,.909,1.6,5.2,1.6,0.6,0.7,1.0,2.6,8.7
1,CHI,3,0,6.7,1.0,1.3,.750,0.0,0.0,,...,1.3,.750,0.7,2.3,0.3,0.0,0.0,0.3,1.0,3.0
2,NYL,24,2,17.2,2.7,6.5,.417,1.2,2.8,.426,...,0.7,.813,0.5,2.5,0.7,0.5,0.8,0.7,2.1,7.2
3,MIN,5,0,2.8,0.4,1.2,.333,0.0,0.0,,...,0.0,,0.4,1.0,0.2,0.0,0.2,0.0,0.0,0.8
4,CON,17,0,7.1,0.6,2.1,.314,0.0,0.0,,...,1.0,.706,0.8,1.8,0.2,0.4,0.2,0.6,1.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164,NYL,18,0,7.9,1.3,3.2,.414,0.3,0.6,.500,...,0.1,.500,0.1,0.8,0.1,0.2,0.2,0.1,0.3,3.0
165,LVA,34,34,22.6,2.2,6.9,.322,0.4,1.3,.318,...,2.1,.808,0.7,3.3,4.5,0.8,0.4,1.6,1.4,6.6
166,LVA,34,4,18.5,2.0,5.3,.380,0.1,0.5,.313,...,1.8,.639,1.0,3.6,2.0,0.7,0.1,1.4,1.8,5.3
167,NYL,24,23,23.3,3.4,7.2,.468,0.9,2.9,.319,...,1.1,.852,1.1,6.3,0.9,1.1,1.4,1.3,3.3,8.6


In [6]:
# Create a DataFrame of all the WNBA stats for all the players
merge_table = pd.concat([player_names,stats],axis=1)
merge_table2 = merge_table.rename(columns={0:"Player"})
merge_table2.head()


Unnamed: 0,Player,Tm,G,GS,MP,FG,FGA,FG%,3P,3PA,...,FTA,FT%,ORB,TRB,AST,STL,BLK,TOV,PF,PTS
0,Natalie Achonwa,IND,30,18,21.2,3.3,6.8,0.488,0.1,0.4,...,2.2,0.909,1.6,5.2,1.6,0.6,0.7,1.0,2.6,8.7
1,Kayla Alexander,CHI,3,0,6.7,1.0,1.3,0.75,0.0,0.0,...,1.3,0.75,0.7,2.3,0.3,0.0,0.0,0.3,1.0,3.0
2,Rebecca Allen,NYL,24,2,17.2,2.7,6.5,0.417,1.2,2.8,...,0.7,0.813,0.5,2.5,0.7,0.5,0.8,0.7,2.1,7.2
3,Jillian Alleyne,MIN,5,0,2.8,0.4,1.2,0.333,0.0,0.0,...,0.0,,0.4,1.0,0.2,0.0,0.2,0.0,0.0,0.8
4,Kristine Anigwe,CON,17,0,7.1,0.6,2.1,0.314,0.0,0.0,...,1.0,0.706,0.8,1.8,0.2,0.4,0.2,0.6,1.0,2.0


### Sort the data by teams

In [7]:
# Find all the teams in the WNBA
teams_prime = stats["Tm"]
teams = []
for team in teams_prime:
    if team not in teams:
        teams.append(team)

del teams[6]
teams

['IND',
 'CHI',
 'NYL',
 'MIN',
 'CON',
 'DAL',
 'WAS',
 'LAS',
 'ATL',
 'PHO',
 'LVA',
 'SEA']

In [8]:
# Create a team-specific data frame with the information we want
team = teams[0]
team_stats = merge_table2.loc[(stats["Tm"] == team), ["Player", "Tm", "G", "PTS"]]
#Save a csv file 
team_stats.to_csv('../csv-files/WNBA_Data_Scrape_{}_{}.csv'.format(team, year),index=False)

team_stats

Unnamed: 0,Player,Tm,G,PTS
0,Natalie Achonwa,IND,30,8.7
21,Kennedy Burke,IND,31,4.4
49,Candice Dupree,IND,34,11.6
75,Shenise Johnson,IND,17,4.9
79,Paris Kea,IND,11,2.6
80,Betnijah Laney,IND,34,5.6
88,Stephanie Mavunga,IND,24,2.5
90,Erica McCall,IND,15,0.9
93,Teaira McCowan,IND,34,10.0
96,Kelsey Mitchell,IND,34,13.6
