In [1]:
import requests
import json
from bs4 import BeautifulSoup
import pandas as pd
from collections import defaultdict
import csv

# Schema 
- Team, Year, Playoff, Standing
    - Str, Str, Boolean, Int
    

- Player, Team, Year, PPG, AST, TRB, 3P%, 2P%, MP, GP
    - Str, Str, Year, float, float, float, float, float, float, float, int
    

# Team

### Playoff Standings

In [48]:
url = requests.get('https://www.basketball-reference.com/playoffs/NBA_2020.html#all_all_playoffs')
soup = BeautifulSoup(url.content, 'html.parser')

In [49]:
dictionary = defaultdict(int)
#Iterating backwords to work from bottom of playoffs upwards
for row in reversed(soup.find('table', {'id': 'all_playoffs'}).find_all('tr')):
    
    category = row.find_all('td')
    if len(category) == 3:
        playoff_round = category[0].text
        team1 = category[1].text.split('\n')[0].strip()
        team2 = category[1].text.split('\n')[2].strip()
        
        if 'First Round' in playoff_round:
            dictionary[team1] = 4
            dictionary[team2] = 4
        elif 'Conference Semifinals' in playoff_round:
            dictionary[team1] = 3
            dictionary[team2] = 3
        elif 'Conference Finals' in playoff_round:
            dictionary[team1] = 2
            dictionary[team2] = 2
        elif playoff_round == 'Finals':
            dictionary[team1] = 1
            dictionary[team2] = 1
        
print(dictionary)

defaultdict(<class 'int'>, {'Los Angeles Lakers': 1, 'Portland Trail Blazers': 4, 'Los Angeles Clippers': 3, 'Dallas Mavericks': 4, 'Houston Rockets': 3, 'Oklahoma City Thunder': 4, 'Denver Nuggets': 2, 'Utah Jazz': 4, 'Toronto Raptors': 3, 'Brooklyn Nets': 4, 'Milwaukee Bucks': 3, 'Orlando Magic': 4, 'Miami Heat': 1, 'Indiana Pacers': 4, 'Boston Celtics': 2, 'Philadelphia 76ers': 4})


### Team - Year - Playoff

In [50]:
url = requests.get("https://www.basketball-reference.com/leagues/NBA_2020.html#")
soup = BeautifulSoup(url.content, 'html.parser')

In [51]:
east = soup.find('div', {"id": "all_confs_standings_E"})
west = soup.find('div', {"id": "all_confs_standings_W"})
year = int(soup.find('h1').span.text[:-3])+1

### Script for creating CSV

In [52]:
csv_file = open('NBA_Teams_2020.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['Team', 'Abbrev', 'Year', 'Playoff', 'Standing'])

for row in east.find('table').find('tbody').find_all('tr'):
    team = row.th.text
    abbrev = row.th.a.get('href').split('/')[2]
    playoff = 0
    if "*" in team:
        playoff+=1
    team = team.replace("*", "")
    csv_writer.writerow([team, abbrev, year, playoff, dictionary[team]])
    
for row in west.find('table').find('tbody').find_all('tr'):
    team = row.th.text
    abbrev = row.th.a.get('href').split('/')[2]
    playoff = 0
    if "*" in team:
        playoff+=1
    team = team.replace("*", "")
    csv_writer.writerow([team, abbrev, year, playoff, dictionary[team]])
    
csv_file.close()


# Players

In [2]:
link = "https://www.basketball-reference.com{0}"

In [34]:
url = requests.get('https://www.basketball-reference.com/leagues/NBA_2021.html#all_team-stats-per_game')
soup = BeautifulSoup(url.content, 'html.parser')

In [35]:
#Find teams based on division
east = soup.find('div', {"id": "all_confs_standings_E"})
west = soup.find('div', {"id": "all_confs_standings_W"})
year = int(soup.find('h1').span.text[:-3])+1

In [36]:
#Gathers all team links into one list
team_links = []
for row in east.find('tbody').findAll('tr'):
    team_links.append(link.format(row.th.a['href']))
    
for row in west.find('tbody').findAll('tr'):
    team_links.append(link.format(row.th.a['href']))

In [37]:
csv_file = open('NBA_Players_2021.csv', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['name','team', 'year', 'ppg', 'apg', 'rpg', '3p%', '2p%', 'mp', 'gp'])

for i in team_links:
    page = i
    url = requests.get(page)
    soup = BeautifulSoup(url.content, 'html.parser')

    team = page.split('/')[4]
    year = page.split('/')[5][:4]

    for row in soup.find('div', {'id': 'all_per_game' }).tbody.findAll('tr'):
        name = row.find_all('td')[0].a.text
        PPG = row.find_all('td')[-1].text
        AST = row.find_all('td')[-6].text
        TRB = row.find_all('td')[-7].text
        three_pp = row.find_all('td')[10].text
        two_pp = row.find_all('td')[13].text
        MP = row.find_all('td')[4].text
        GP = row.find_all('td')[2].text
        print(name,team, year, PPG, AST, TRB, three_pp, two_pp, MP, GP)
        csv_writer.writerow([name,team, year, PPG, AST, TRB, three_pp, two_pp, MP, GP])
    
csv_file.close()

Tobias Harris PHI 2021 20.1 2.8 6.7 .461 .554 33.8 16
Ben Simmons PHI 2021 13.0 8.0 8.6 .167 .540 33.0 17
Joel Embiid PHI 2021 27.7 2.8 11.1 .400 .579 32.0 15
Seth Curry PHI 2021 14.5 3.0 2.0 .509 .569 28.9 12
Danny Green PHI 2021 9.4 2.3 3.6 .371 .410 28.5 19
Shake Milton PHI 2021 14.7 3.2 1.9 .317 .549 25.4 16
Tyrese Maxey PHI 2021 9.7 1.9 2.3 .293 .515 19.4 19
Mike Scott PHI 2021 4.9 0.1 2.8 .250 .625 19.1 8
Isaiah Joe PHI 2021 6.4 1.3 1.6 .370 .333 18.2 10
Dwight Howard PHI 2021 6.2 0.7 7.7 .429 .600 17.3 19
Furkan Korkmaz PHI 2021 6.8 0.8 2.0 .333 .333 16.3 8
Dakota Mathias PHI 2021 6.0 1.6 0.9 .308 .500 15.4 8
Matisse Thybulle PHI 2021 2.9 0.7 1.3 .235 .500 15.2 16
Tony Bradley PHI 2021 5.8 0.5 7.0 .000 .552 15.0 6
Paul Reed PHI 2021 3.6 0.6 2.0 .000 .474 11.0 5
Vincent Poirier PHI 2021 1.3 0.3 1.0  .333 4.7 3
Terrance Ferguson PHI 2021 0.0 0.3 0.2 .000  3.7 6
Khris Middleton MIL 2021 21.9 5.8 6.5 .441 .566 33.0 17
Jrue Holiday MIL 2021 15.5 5.3 4.5 .358 .543 32.8 17
Giannis Ante

Gordon Hayward CHO 2021 23.6 3.6 5.2 .427 .548 35.4 17
Devonte' Graham CHO 2021 13.9 6.1 3.3 .342 .333 34.1 18
Terry Rozier CHO 2021 18.9 2.9 4.0 .421 .508 33.8 18
P.J. Washington CHO 2021 12.6 2.9 6.7 .292 .496 29.1 18
Bismack Biyombo CHO 2021 6.7 1.7 6.9 .000 .537 25.8 18
Miles Bridges CHO 2021 9.5 1.7 5.1 .391 .514 25.6 18
LaMelo Ball CHO 2021 11.1 5.8 5.9 .308 .472 24.4 18
Cody Zeller CHO 2021 7.2 1.8 7.0 .000 .609 18.4 5
Caleb Martin CHO 2021 5.6 1.4 2.6 .242 .583 14.7 16
Cody Martin CHO 2021 3.5 1.3 2.3 .200 .722 11.4 11
Malik Monk CHO 2021 4.0 1.6 0.8 .444 .400 10.4 5
Jalen McDaniels CHO 2021 3.9 0.6 2.1 .333 .579 9.2 9
Nick Richards CHO 2021 1.3 0.0 1.0  1.000 3.0 4
Vernon Carey CHO 2021 1.0 0.3 1.0 .000 .500 2.0 3
Fred VanVleet TOR 2021 18.9 6.6 4.5 .356 .444 36.2 18
Kyle Lowry TOR 2021 18.0 6.4 6.2 .379 .488 36.1 15
Pascal Siakam TOR 2021 18.1 4.6 8.2 .254 .515 35.5 15
OG Anunoby TOR 2021 14.6 1.6 5.8 .433 .550 34.8 17
Norman Powell TOR 2021 14.3 1.4 2.4 .406 .450 25.9 18
Chr

Kyle Anderson MEM 2021 12.5 4.1 7.2 .333 .477 29.7 13
Brandon Clarke MEM 2021 13.2 1.8 6.2 .259 .511 29.3 13
Dillon Brooks MEM 2021 15.2 3.1 3.9 .305 .411 29.2 13
Ja Morant MEM 2021 22.6 7.0 2.0 .286 .585 29.2 5
Jonas Valančiūnas MEM 2021 14.4 1.8 10.4 .286 .563 26.8 12
Tyus Jones MEM 2021 8.4 5.4 2.8 .229 .519 25.8 13
Desmond Bane MEM 2021 8.6 1.1 2.8 .489 .450 21.9 13
Grayson Allen MEM 2021 8.0 2.0 2.9 .360 .409 21.5 12
Xavier Tillman MEM 2021 8.6 1.6 3.9 .200 .638 20.8 8
De'Anthony Melton MEM 2021 8.1 2.4 2.3 .353 .514 19.9 7
John Konchar MEM 2021 5.5 2.7 4.0 .375 .500 17.5 6
Gorgui Dieng MEM 2021 8.1 1.6 4.4 .440 .588 16.8 10
Sean McDermott MEM 2021 2.3 1.0 1.0 .200 .500 14.0 3
Tim Frazier MEM 2021 1.3 2.3 1.3 .000 .333 11.0 3
Devin Booker PHO 2021 22.9 4.1 3.5 .341 .544 35.7 14
Mikal Bridges PHO 2021 14.9 2.0 5.5 .432 .568 33.2 17
Chris Paul PHO 2021 14.8 8.5 4.6 .298 .506 32.5 17
Deandre Ayton PHO 2021 14.2 1.9 12.4 .222 .605 31.5 17
Jae Crowder PHO 2021 11.0 2.5 5.0 .350 .486 29