# Web scraping Player and Team Links from Basketball-reference

This notebook is meant to web scrape basketball-reference profile links and names to be used for other visualizations, ML models etc. 

## Importing Libraries

In [1]:
from requests import get
from bs4 import BeautifulSoup
import re
import pandas as pd
import matplotlib.pyplot as plt

## Scrape Player Names and Links to Profiles

In [2]:
last_letters = 'abcdefghijklmnopqrstuvwyz'
player_links = []
player_names = []
for last_name_letter in last_letters:
    response = get("https://www.basketball-reference.com/players/"+last_name_letter)
    soup = BeautifulSoup(response.text, "html.parser")
    for link in soup.findAll('a', attrs={'href': re.compile("/players/"+last_name_letter)}):
        player_link = "https://www.basketball-reference.com"+link.get('href')
        player_links.append(player_link)
        response = get(player_link)
        soup = BeautifulSoup(response.text, "html.parser")
        player_name = str(soup.find('h1', {'itemprop' : re.compile('name')}))
        player_name = re.search(r'<span>(.*?)</span>', player_name).group(1)
        player_names.append(player_name)

## Save dataframe to csv

In [None]:
# data = pd.DataFrame({"Player Name": player_names, "Player Links":player_links})
# data.to_csv("player_links.csv")
data = pd.read_csv("player_links.csv")
data = data.drop(["Unnamed: 0"], axis=1)
data

## Scrape Team Names and Links to Profiles

In [None]:
team_abbrs = [
    'ATL', 'BOS', 'NJN', 'CHA', 'CHI', 'CLE', 'DAL', 'DEN', 'DET', 'GSW', 'HOU', 'IND', 
    'LAC', 'LAL', 'MEM', 'MIA', 'MIL', 'MIN', 'NOH', 'NYK', 'OKC', 'ORL', 'PHI', 'PHO', 
    'POR', 'SAC', 'SAS', 'TOR', 'UTA', 'WAS'
]

team_links = []
team_names = []
for abbr in team_abbrs:
    team_link = "https://www.basketball-reference.com/teams/"+abbr+"/"
    team_links.append(team_link)
    response = get(team_link)
    soup = BeautifulSoup(response.text, "html.parser")
    team_name = str(soup.find('h1', {'itemprop' : re.compile('name')}))
    team_name = re.search(r'<span>(.*?)</span>', team_name).group(1)
    team_names.append(team_name)

## Save dataframe to csv

In [None]:
data = pd.DataFrame({"Team Name": team_names, "Team Links":team_links})
data.to_csv("team_links.csv")
data