In [None]:
# Code to scrape SportsReference Conference Page to assign conferences

import time
import pandas as pd
import requests

CONF_MAP = {
    # full_name: slug
    "Atlantic Coast Conference": "acc",
    "America East Conference": "america-east",
    "American Conference": "american",
    "Atlantic 10 Conference": "atlantic-10",
    "Atlantic Sun Conference": "atlantic-sun",
    "Big 12 Conference": "big-12",
    "Big East Conference": "big-east",
    "Big Sky Conference": "big-sky",
    "Big South Conference": "big-south",
    "Big Ten Conference": "big-ten",
    "Big West Conference": "big-west",
    "Coastal Athletic Association": "coastal",
    "Conference USA": "cusa",
    "Horizon League": "horizon",
    "Ivy League": "ivy",
    "Metro Atlantic Athletic Conference": "maac",
    "Mid-American Conference": "mac",
    "Mid-Eastern Athletic Conference": "meac",
    "Missouri Valley Conference": "mvc",
    "Mountain West Conference": "mwc",
    "NEC": "nec",
    "Ohio Valley Conference": "ovc",
    "Patriot League": "patriot",
    "Southeastern Conference": "sec",
    "Southern Conference": "southern",
    "Southland Conference": "southland",
    "Summit League": "summit",
    "Sun Belt Conference": "sun-belt",
    "Southwest Athletic Conference": "swac",
    "Western Athletic Conference": "wac",
    "West Coast Conference": "wcc",
}

BASE = "https://www.sports-reference.com/cbb/conferences/{}/men/schools.html"

def fetch_conference_teams(full_name, slug, sleep_seconds=5):
    url = BASE.format(slug)
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"}
    r = requests.get(url, headers=headers, timeout=30)
    r.raise_for_status()

    df = pd.read_html(r.text, attrs={"id": "schools"})[0]

    # keep only teams with To == 2026
    df["To"] = pd.to_numeric(df["To"], errors="coerce")
    df = df[df["To"] == 2026]

    df = df.rename(columns={"School": "team_name"})
    df["conference"] = full_name
    df["conference_slug"] = slug

    time.sleep(sleep_seconds)  # polite delay
    return df[["team_name", "conference", "conference_slug"]]

rows = []
for full_name, slug in CONF_MAP.items():
    rows.append(fetch_conference_teams(full_name, slug, sleep_seconds=2))

teams_conf_df = pd.concat(rows, ignore_index=True)
teams_conf_df


In [7]:
import pandas as pd
team_ids = pd.read_csv('/Users/mavinjames/Projects/Basketball_Modeling/NCAAB_Sports_Reference_Scraper/data/team_ids.csv')
team_ids = team_ids.drop(columns=["Unnamed: 0"], errors="ignore")
team_ids

Unnamed: 0,team_id,school_name,school_slug,conference,conference_slug
0,1,Abilene Christian,abilene-christian,Western Athletic Conference,wac
1,2,Air Force,air-force,Mountain West Conference,mwc
2,3,Akron,akron,Mid-American Conference,mac
3,4,Alabama,alabama,Southeastern Conference,sec
4,5,Alabama A&M,alabama-am,Southwest Athletic Conference,swac
...,...,...,...,...,...
360,361,Wright State,wright-state,Horizon League,horizon
361,362,Wyoming,wyoming,Mountain West Conference,mwc
362,363,Xavier,xavier,Big East Conference,big-east
363,364,Yale,yale,Ivy League,ivy
