In [1]:
from entities import *

import time
import requests
from bs4 import BeautifulSoup, Comment

from tqdm import tqdm, trange

import pandas as pd
pd.set_option('display.max_columns', None)

In [2]:
URL = "https://www.sports-reference.com/"

In [3]:
def fetch_cbb_page(url, delay=3):
    """
    Fetch a page from Sports Reference with polite rate limiting

    Args:
        url: The URL to fetch
        delay: Seconds to wait before request (respects 20 req/min limit)

    Returns:
        BeautifulSoup object or None if failed
    """
    # Be polite - wait between requests
    time.sleep(delay)

    # Headers to mimic a browser
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    try:
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            print(f"✓ Successfully fetched: {url}")
            return BeautifulSoup(response.content, 'html.parser')
        elif response.status_code == 429:
            print("✗ Rate limited! Wait 60 seconds and try again.")
            return None
        elif response.status_code == 403:
            print("✗ Access forbidden. Your IP may be blocked.")
            return None
        else:
            print(f"✗ Failed with status code: {response.status_code}")
            return None

    except Exception as e:
        print(f"✗ Error: {e}")
        return None

In [4]:
soup = fetch_cbb_page(
    'https://www.sports-reference.com/cbb/seasons/men/2026-school-stats.html')

✓ Successfully fetched: https://www.sports-reference.com/cbb/seasons/men/2026-school-stats.html


In [5]:
school_tds = soup.find_all('td', {'data-stat': 'school_name'})

schools = []
for td in tqdm(school_tds):
    link = td.find('a')

    if link:
        school_dict = {
            'School': link.text,
            'Link': f"{URL}{link['href']}"
        }
        schools.append(school_dict)

print(f"Found {len(schools)} schools")

100%|██████████| 365/365 [00:00<00:00, 80941.15it/s]

Found 365 schools





In [6]:
df = pd.DataFrame(schools)
df

Unnamed: 0,School,Link
0,Abilene Christian,https://www.sports-reference.com//cbb/schools/...
1,Air Force,https://www.sports-reference.com//cbb/schools/...
2,Akron,https://www.sports-reference.com//cbb/schools/...
3,Alabama,https://www.sports-reference.com//cbb/schools/...
4,Alabama A&M,https://www.sports-reference.com//cbb/schools/...
...,...,...
360,Wright State,https://www.sports-reference.com//cbb/schools/...
361,Wyoming,https://www.sports-reference.com//cbb/schools/...
362,Xavier,https://www.sports-reference.com//cbb/schools/...
363,Yale,https://www.sports-reference.com//cbb/schools/...


In [7]:
t = pd.read_html(
    'https://www.sports-reference.com//cbb/schools/abilene-christian/men/2026.html')

a = pd.read_html(
    'https://www.sports-reference.com/cbb/seasons/men/2026-school-stats.html')[0]

In [8]:
d_df = BasketballData(t[0])
p_df = BasketballData(t[6])
record = a[a['Unnamed: 1_level_0', 'School'] == 'Abilene Christian'][a.columns[2:8]]['Overall'].iloc[0]

player_context = PlayerContext(school='Abeline Christian', description_df=d_df, performance_df=p_df, record=record)

In [11]:
team = Team(player_context)

In [17]:
team.df

Unnamed: 0_level_0,#,Height,Weight,Rk,Pos,G,GS,MP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,ORtg,DRtg,Awards
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
Bradyn Hubbard,15,79,225.0,1,F,2,1,55,12.2,28.5,0.429,2.0,10.2,0.2,10.2,18.3,0.556,0.464,15.2,16.3,0.938,3.0,7.1,10.2,7.1,3.0,1.0,5.1,6.1,41.7,126.7,88.2,
Joseph Scott,10,81,210.0,5,F,2,1,39,14.3,22.9,0.625,0.0,1.4,0.0,14.3,21.5,0.667,0.625,2.9,2.9,1.0,11.5,21.5,33.0,4.3,0.0,2.9,5.7,5.7,31.5,122.6,80.9,
Chilaydrien Newton,5,75,175.0,2,G,2,2,54,4.1,15.5,0.267,2.1,7.2,0.286,2.1,8.3,0.25,0.333,5.2,5.2,1.0,1.0,8.3,9.3,4.1,2.1,1.0,2.1,4.1,15.5,109.2,89.6,
Zy Wright,0,77,,6,G,2,1,27,12.4,18.6,0.667,4.1,8.3,0.5,8.3,10.4,0.8,0.778,2.1,4.1,0.5,6.2,2.1,8.3,4.1,0.0,0.0,2.1,2.1,31.1,149.8,100.9,
Christian Alston,7,77,165.0,7,G,2,0,25,13.4,29.1,0.462,0.0,4.5,0.0,13.4,24.6,0.545,0.462,4.5,13.4,0.333,0.0,6.7,6.7,2.2,4.5,0.0,2.2,11.2,31.3,96.8,86.0,
Rich Smith,4,76,175.0,4,G,2,2,49,5.7,13.7,0.417,1.1,2.3,0.5,4.6,11.4,0.4,0.458,3.4,6.8,0.5,4.6,3.4,8.0,12.6,5.7,1.1,5.7,5.7,16.0,105.2,85.2,
Ma'Shy Hill,11,81,215.0,8,F,2,1,25,11.2,13.4,0.833,0.0,0.0,,11.2,13.4,0.833,0.833,2.2,8.9,0.25,0.0,8.9,8.9,2.2,0.0,0.0,6.7,8.9,24.6,91.4,94.5,
Yaniel Rivera,2,76,175.0,9,G,1,0,23,9.7,19.4,0.5,7.3,17.0,0.429,2.4,2.4,1.0,0.688,0.0,0.0,,0.0,2.4,2.4,4.9,2.4,0.0,9.7,0.0,26.7,93.4,95.2,
Isaac Dye,24,74,200.0,10,G,2,0,18,9.3,18.6,0.5,6.2,15.5,0.4,3.1,3.1,1.0,0.667,0.0,0.0,,3.1,6.2,9.3,3.1,0.0,3.1,0.0,9.3,24.8,159.6,93.6,
Tylan LEWIS,3,72,200.0,11,G,2,0,18,9.3,15.5,0.6,0.0,0.0,,9.3,15.5,0.6,0.6,0.0,0.0,,6.2,12.4,18.6,6.2,3.1,0.0,0.0,9.3,18.6,154.2,84.3,


In [24]:
# Build ALL Team objects

In [25]:
# Build TABLE with ALL College Players