### Import libraries and run BeautifulSoup

In [1]:
# Import Libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
from urllib.request import urlopen

In [2]:
# Call the html for the G-league's main page.
url = 'https://www.basketball-reference.com/gleague/'
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')


### Create list of team names and the list for 2nd BeautifulSoup

In [3]:
# The data we want are in the team's websites.  So we will first grab a list of team abbreviations.

result1 = soup.find_all('th', class_='left')
team_names = []
for result in result1:
    team_names.append(result.text)
del team_names[0]    
del team_names[13]    

team_names    

['SLS',
 'MEM',
 'AUS',
 'STK',
 'TEX',
 'SAN',
 'SFL',
 'ACC',
 'OKL',
 'SBL',
 'WOL',
 'RIO',
 'NAS',
 'WIS',
 'MAI',
 'CAN',
 'GRR',
 'LKL',
 'DBC',
 'CGG',
 'CPK',
 'RAP',
 'FOR',
 'LIN',
 'WES',
 'WCB',
 'EBH',
 'SWR']

In [4]:
# We can now run Beautiful soup on each team's page.  Index [1] corresponds to Memphis' team (MEM)

team = team_names[1]
year = 2020
url2 = 'https://www.basketball-reference.com/gleague/teams/{}/{}.html'.format(team, year)
html2 = urlopen(url2)
soup2 = BeautifulSoup(html2)

### Get Player names for each team 

In [5]:
# The player names are unique in that they use a class "left" table data.

result1 = soup2.find_all('td', class_='left')
names = []
for result in result1:
    names.append(result.a.text) 
names    


['Dusty Hannahs',
 'Josh Jackson',
 'Marko Guduric',
 'Jarrod Uthoff',
 'Yuta Watanabe',
 "De'Anthony Melton",
 'Marquis Teague',
 'Matt Mooney',
 'John Konchar',
 'Bruno Caboclo',
 'Shaq Buchanan',
 'Ahmad Caver',
 'Rahlir Hollis-Jefferson',
 'Venky Jois',
 'Nino Johnson',
 'Raynere Thornton',
 'Phil Cofer']

### Get column headers

In [6]:
# Find all the table rows.  The info we need is on the fourth table row.
souper = soup2.findAll('tr', limit=5)
# create list of headers.  Append each table header
headers = []
for th in souper[3].findAll('th'):
    headers.append(th.getText())
headers = headers[0:]

# We delete the first header.  We don't need "Rank" data, and that table data will not be found in our search below.
del headers[0]
headers

['Player',
 'G',
 'GS',
 'MP',
 'FG',
 'FGA',
 'FG%',
 '3P',
 '3PA',
 '3P%',
 '2P',
 '2PA',
 '2P%',
 'FT',
 'FTA',
 'FT%',
 'ORB',
 'DRB',
 'TRB',
 'AST',
 'STL',
 'BLK',
 'TOV',
 'PF',
 'PTS']

In [7]:
# We find all the table rows, and extract the table data in each row.
# The end result is a list of lists. Each sub-list has 25 entries.
rows = soup2.findAll('tr')[1:]
player_stats = [[td.getText() for td in rows[i].findAll('td')]
            for i in range(len(rows))]

# The first two stats groups are team totals.  The third stat group is an empty set.  We delete these.
del player_stats[0:3]
player_stats[0]

['Dusty Hannahs',
 '35',
 '22',
 '30.4',
 '7.4',
 '15.6',
 '.472',
 '3.2',
 '7.0',
 '.461',
 '4.1',
 '8.6',
 '.480',
 '1.9',
 '2.1',
 '.907',
 '0.4',
 '1.8',
 '2.2',
 '1.9',
 '0.6',
 '0.1',
 '1.7',
 '2.2',
 '21.1']

In [8]:
# This is just a check that the number of column headers and number of data points per column is equal.
print("The length of column headers is: " + str(len(headers)))
print("The length of columns of data is: " + str(len(player_stats[0])))


The length of column headers is: 25
The length of columns of data is: 25


### Create Pandas DataFrame for each team

In [9]:
# Create the datafame
stats_prime = pd.DataFrame(player_stats, columns = headers)
# We only need Three columns of data from this dataframe
stats = stats_prime.loc[:, ["Player", "G", "PTS"]]
# Write the data onto a csv file.  
stats.to_csv('../csv-files/G-League_Data_Scrape_{}_{}.csv'.format(team, year),index=False)
stats

Unnamed: 0,Player,G,PTS
0,Dusty Hannahs,35,21.1
1,Josh Jackson,26,20.4
2,Marko Guduric,1,20.0
3,Jarrod Uthoff,34,19.0
4,Yuta Watanabe,22,17.2
5,De'Anthony Melton,2,17.0
6,Marquis Teague,39,13.1
7,Matt Mooney,24,12.6
8,John Konchar,20,12.2
9,Bruno Caboclo,2,11.5
