In [1]:

import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

In [2]:

wiz_url = (f'https://www.basketball-reference.com/teams/WAS/2021.html')

# La biblioteca de solicitudes puede enviar una solicitud GET a wiz_url
wiz_res = requests.get(wiz_url)

In [3]:
# La biblioteca BeautifulSoup analiza el contenido de un documento HTML, en este caso wiz_res
wiz_soup = BeautifulSoup(wiz_res.content, 'lxml')

# El método .find() de BeautifulSoup busca una etiqueta y atributos específicos,
# devolver el primer partido
wiz_per_game = wiz_soup.find(name = 'table', attrs = {'id' : 'per_game'})

In [4]:
# Crear una lista de diccionarios para luego convertirlos en un marco de datos de Pandas
wiz_stats = []

for row in wiz_per_game.find_all('tr')[1:]:  # Excluding the first 'tr', since that's the table's title head

    player = {}
    player['Name'] = row.find('a').text.strip()
    player['Age'] = row.find('td', {'data-stat' : 'age'}).text
    player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
    player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
    player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
    player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
    player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
    player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
    player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
    player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text
    wiz_stats.append(player)

pd.DataFrame(wiz_stats)

Unnamed: 0,Name,Age,Min PG,Field Goal %,Rebounds PG,Assists PG,Steals PG,Blocks PG,Turnovers PG,Points PG
0,Russell Westbrook,32,36.4,0.439,11.5,11.7,1.4,0.4,4.8,22.2
1,Bradley Beal,27,35.8,0.485,4.7,4.4,1.2,0.4,3.1,31.3
2,Rui Hachimura,22,31.5,0.478,5.5,1.4,0.8,0.1,1.2,13.8
3,Thomas Bryant,23,27.1,0.648,6.1,1.5,0.4,0.8,1.1,14.3
4,Dāvis Bertāns,28,25.7,0.404,2.9,0.9,0.6,0.2,0.6,11.5
5,Deni Avdija,20,23.3,0.417,4.9,1.2,0.6,0.3,0.6,6.3
6,Raul Neto,28,21.9,0.468,2.4,2.3,1.1,0.1,0.8,8.7
7,Ish Smith,32,21.0,0.434,3.4,3.9,0.7,0.3,0.9,6.7
8,Robin Lopez,32,19.1,0.633,3.8,0.8,0.2,0.6,1.1,9.0
9,Jerome Robinson,23,17.9,0.295,2.2,1.5,0.7,0.4,1.1,4.9


In [5]:

twitter_handle = []

for row in wiz_per_game.find_all('tr')[1:]:
    player = {}
    
    # Tomar el primer hipervínculo de la fila (terminación de la URL del jugador) y agregarlo a la URL base
    # para obtener la URL de la página web personal del jugador. 
    player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
    
    # Crear una nueva instancia de BeautifulSoup de la página web del jugador y reducirla a la sección superior
    player_rest = requests.get(player_url)
    player_soup = BeautifulSoup(player_rest.content, 'lxml')
    player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})
    
    # Agregar el nombre del jugador para mayor claridad
    player['Name'] = row.find('a').text.strip()
    
    # Crear una lista de todos los hipervínculos de player_info
    player_links= []
    for link in player_info.find_all('a'):
        player_links.append(link.get('href'))
    
    # Si existe el twitter de un jugador, el enlace es el segundo en la lista player_links. Si no existe,
    # el valor se establece en 'No listado'.
    if 'twitter' in player_links[1]:
        player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
    else:
        player['Twitter Handle'] = 'Not Listed'
        
    twitter_handle.append(player)
    
pd.DataFrame(twitter_handle)

Unnamed: 0,Name,Twitter Handle
0,Russell Westbrook,russwest44
1,Bradley Beal,RealDealBeal23
2,Rui Hachimura,rui_8mura
3,Thomas Bryant,nolimittb31
4,Dāvis Bertāns,DBertans_42
5,Deni Avdija,Not Listed
6,Raul Neto,RaulTogni
7,Ish Smith,IshSmith
8,Robin Lopez,rolopez42
9,Jerome Robinson,Rome_Coldbucks1


In [6]:
height_weight_position = []

for row in wiz_per_game.find_all('tr')[1:]:
    
    player = {}
    
    # Análisis de datos html de la página web específica de cada jugador
    player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
    player_rest = requests.get(player_url)
    player_soup = BeautifulSoup(player_rest.content, 'lxml')
    player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})
    
    # Agregar nombre para mayor claridad
    player['Name'] = row.find('a').text.strip()
    
    # Uso de RegEx para extraer la altura, el peso y la posición del perfil web de cada jugador.
    # La notación regex '(.*)' permite la extracción de texto entre dos subcadenas conocidas,
    # que es el texto escrito a cada lado de '(.*)' en el siguiente código. 
    s = str(player_info.find_all('p'))
    weight = re.search('\"weight\">(.*)lb</span>', s)
    position = re.search('Position:\n  </strong>\n (.*)\n\n', s)
    height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
    player['Height'] = height.group(1).strip()
    player['Weight (Lbs)'] = weight.group(1).strip()
    player['Position'] = position.group(1).strip()

    height_weight_position.append(player)
        
pd.DataFrame(height_weight_position)

Unnamed: 0,Name,Height,Weight (Lbs),Position
0,Russell Westbrook,6-3,200,Point Guard
1,Bradley Beal,6-4,207,Shooting Guard
2,Rui Hachimura,6-8,230,Power Forward
3,Thomas Bryant,6-10,248,Center
4,Dāvis Bertāns,6-10,225,Power Forward
5,Deni Avdija,6-9,210,Small Forward
6,Raul Neto,6-1,180,Point Guard
7,Ish Smith,6-0,175,Point Guard
8,Robin Lopez,7-0,281,Center
9,Jerome Robinson,6-4,190,Shooting Guard


In [7]:
# URL para la página de referencia de baloncesto de los Washington Wizards
wiz_url = (f'https://www.basketball-reference.com/teams/WAS/2021.html')

# La biblioteca de solicitudes puede enviar una solicitud GET a wiz_url
wiz_res = requests.get(wiz_url)

# La biblioteca BeautifulSoup analiza el contenido de un documento HTML, en este caso wiz_res
wiz_soup = BeautifulSoup(wiz_res.content, 'lxml')

# El método .find() de BeautifulSoup busca una etiqueta y atributos específicos,
# devolver el primer partido
wiz_per_game = wiz_soup.find(name = 'table', attrs = {'id' : 'per_game'})

# Hacer una lista de diccionarios para luego convertirlos en un pd.DataFrame
wiz_info = []
for row in wiz_per_game.find_all('tr')[1:]:  # Excluyendo el primer 'tr', ya que ese es el encabezado del título de la tabla

    player = {}
    player['Name'] = row.find('a').text.strip()
    player['Age'] = row.find('td', {'data-stat' : 'age'}).text
    player['Min PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
    player['Field Goal %'] = row.find('td', {'data-stat' : 'fg_pct'}).text
    player['Rebounds PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
    player['Assists PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
    player['Steals PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
    player['Blocks PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
    player['Turnovers PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
    player['Points PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text

    player_url = ('https://www.basketball-reference.com/' + row.find('a').attrs['href'])
    player_rest = requests.get(player_url)
    player_soup = BeautifulSoup(player_rest.content, 'lxml')
    player_info = player_soup.find(name = 'div', attrs = {'itemtype' : 'https://schema.org/Person'})

    player_links= []
    for link in player_info.find_all('a'):
        player_links.append(link.get('href'))

    if 'twitter' in player_links[1]:
        player['Twitter Handle'] = player_links[1].replace('https://twitter.com/', '')
    else:
        player['Twitter Handle'] = 'Not Listed'

    s = str(player_info.find_all('p'))

    weight = re.search('\"weight\">(.*)lb</span>', s)
    position = re.search('Position:\n  </strong>\n (.*)\n\n', s)
    height = re.search('\"height\">(.*)</span>,\xa0<span itemprop="weight', s)
    player['Height'] = height.group(1).strip()
    player['Weight (Lbs)'] = weight.group(1).strip()
    player['Position'] = position.group(1).strip()

    wiz_info.append(player)
        
pd.DataFrame(wiz_info)

Unnamed: 0,Name,Age,Min PG,Field Goal %,Rebounds PG,Assists PG,Steals PG,Blocks PG,Turnovers PG,Points PG,Twitter Handle,Height,Weight (Lbs),Position
0,Russell Westbrook,32,36.4,0.439,11.5,11.7,1.4,0.4,4.8,22.2,russwest44,6-3,200,Point Guard
1,Bradley Beal,27,35.8,0.485,4.7,4.4,1.2,0.4,3.1,31.3,RealDealBeal23,6-4,207,Shooting Guard
2,Rui Hachimura,22,31.5,0.478,5.5,1.4,0.8,0.1,1.2,13.8,rui_8mura,6-8,230,Power Forward
3,Thomas Bryant,23,27.1,0.648,6.1,1.5,0.4,0.8,1.1,14.3,nolimittb31,6-10,248,Center
4,Dāvis Bertāns,28,25.7,0.404,2.9,0.9,0.6,0.2,0.6,11.5,DBertans_42,6-10,225,Power Forward
5,Deni Avdija,20,23.3,0.417,4.9,1.2,0.6,0.3,0.6,6.3,Not Listed,6-9,210,Small Forward
6,Raul Neto,28,21.9,0.468,2.4,2.3,1.1,0.1,0.8,8.7,RaulTogni,6-1,180,Point Guard
7,Ish Smith,32,21.0,0.434,3.4,3.9,0.7,0.3,0.9,6.7,IshSmith,6-0,175,Point Guard
8,Robin Lopez,32,19.1,0.633,3.8,0.8,0.2,0.6,1.1,9.0,rolopez42,7-0,281,Center
9,Jerome Robinson,23,17.9,0.295,2.2,1.5,0.7,0.4,1.1,4.9,Rome_Coldbucks1,6-4,190,Shooting Guard
