# Getting Player Info

In [1]:
import requests

In [1]:
import requests

alphabet = [chr(i) for i in range(ord('a'), ord('z') + 1)]

url_player_info = "https://www.basketball-reference.com/players/{}/"
for letter in alphabet:
    url = url_player_info.format(letter)
    data = requests.get(url)
    with open ('player_info/{}.html'.format(letter), 'w') as f:
        f.write(data.text)

In [3]:
from bs4 import BeautifulSoup
import pandas as pd

dfs = []

for letter in alphabet:
    with open('player_info/{}.html'.format(letter)) as f:
        page = f.read()
    soup = BeautifulSoup(page, 'html.parser')
    players_html = soup.find(id="players")
    players_table = pd.read_html(str(players_html))[0]
    dfs.append(players_table)
    
players = pd.concat(dfs)
players.head()

Unnamed: 0,Player,From,To,Pos,Ht,Wt,Birth Date,Colleges
0,Alaa Abdelnaby,1991,1995,F-C,6-10,240.0,"June 24, 1968",Duke
1,Zaid Abdul-Aziz,1969,1978,C-F,6-9,235.0,"April 7, 1946",Iowa State
2,Kareem Abdul-Jabbar*,1970,1989,C,7-2,225.0,"April 16, 1947",UCLA
3,Mahmoud Abdul-Rauf,1991,2001,G,6-1,162.0,"March 9, 1969",LSU
4,Tariq Abdul-Wahad,1998,2003,F,6-6,223.0,"November 3, 1974","Michigan, San Jose State"


In [19]:
# Remove asterisk from player names
players['Player'] = players['Player'].str.replace('*', '')
# Format Birth Date to format YYYY-MM-DD
players['Birth Date'] = pd.to_datetime(players['Birth Date'])
# Change height to inches
def height_to_inches(height):
    feet, inches = height.split('-')
    return int(feet) * 12 + int(inches)
players['Ht'] = players['Ht'].apply(height_to_inches)
players.to_csv('players.csv', index=False)
players.shape
players.head()

Unnamed: 0,Player,From,To,Pos,Ht,Wt,Birth Date,Colleges
0,Alaa Abdelnaby,1991,1995,F-C,82,240.0,1968-06-24,Duke
1,Zaid Abdul-Aziz,1969,1978,C-F,81,235.0,1946-04-07,Iowa State
2,Kareem Abdul-Jabbar,1970,1989,C,86,225.0,1947-04-16,UCLA
3,Mahmoud Abdul-Rauf,1991,2001,G,73,162.0,1969-03-09,LSU
4,Tariq Abdul-Wahad,1998,2003,F,78,223.0,1974-11-03,"Michigan, San Jose State"


# Getting Team Info

In [11]:
url_team_info = "https://www.basketball-reference.com/teams/"
data = requests.get(url_team_info)
with open ('team_info/team_info.html', 'w') as f:
    f.write(data.text)

In [12]:
with open('team_info/team_info.html') as f:
    page = f.read()
soup = BeautifulSoup(page, 'html.parser')
teams_active_html = soup.find(id="all_teams_active")
teams_defunct_html = soup.find(id="all_teams_defunct")
teams_active = pd.read_html(str(teams_active_html))[0]
teams_active['Active'] = "Y"
teams_defunct = pd.read_html(str(teams_defunct_html))[0]
teams_defunct['Active'] = "N"
teams = pd.concat([teams_active, teams_defunct])
teams.head()

Unnamed: 0,Franchise,Lg,From,To,Yrs,G,W,L,W/L%,Plyfs,Div,Conf,Champ,Active
0,Atlanta Hawks,NBA,1949-50,2023-24,75,5878,2900,2978,0.493,49,12.0,0,1,Y
1,Atlanta Hawks,NBA,1968-69,2023-24,56,4460,2202,2258,0.494,36,6.0,0,0,Y
2,St. Louis Hawks,NBA,1955-56,1967-68,13,1005,553,452,0.55,12,6.0,0,1,Y
3,Milwaukee Hawks,NBA,1951-52,1954-55,4,281,91,190,0.324,0,0.0,0,0,Y
4,Tri-Cities Blackhawks,NBA,1949-50,1950-51,2,132,54,78,0.409,1,0.0,0,0,Y


In [14]:
teams.to_csv('teams.csv', index=False)

# Getting Player Statistics

### Per Game

In [2]:
years = list(range(1947, 2024))

url_player_stats_leagues = "https://www.basketball-reference.com/leagues/{}_{}_per_game.html"
url_player_stats_playoffs = "https://www.basketball-reference.com/playoffs/{}_{}_per_game.html"

In [3]:
import time
for year in years:
    print(year)
    if (year < 1950):
        leagues = ["BAA"]
    elif (year >= 1950):
        if (year >= 1968 and year <= 1976):
            leagues = ["ABA", "NBA"]
        else:
            leagues = ["NBA"]
    for league in leagues:
        url = url_player_stats_leagues.format(league, year)
        data = requests.get(url)
        time.sleep(2.5)
        with open ('player_stats/per_game/{}_{}_per_game.html'.format(league, year), 'w') as f:
            f.write(data.text)
        url = url_player_stats_playoffs.format(league, year)
        data = requests.get(url)
        time.sleep(2.5)
        with open ('player_stats/per_game/{}_{}_playoffs_per_game.html'.format(league, year), 'w') as f:
            f.write(data.text)

1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
