In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import csv
from datetime import date, timedelta

In [None]:
# Gather data from the last 3 seasons (2022-2025 NCAA seasons) and compile into a CSV for easier access and further data manipulation
def daterange(start_date, end_date):
    for n in range((end_date - start_date).days + 1):
        yield start_date + timedelta(n)

def fetch_games_for_date(date_obj):
    date_str = date_obj.strftime('%Y%m%d')
    url = (
        'https://site.api.espn.com/apis/site/v2/'
        'sports/basketball/mens-college-basketball/scoreboard'
        f'?dates={date_str}'
    )
    resp = requests.get(url)
    resp.raise_for_status()
    data = resp.json()
    games = []
    for event in data.get('events', []):
        comp = event.get('competitions', [{}])[0]
        teams = comp.get('competitors', [])
        if len(teams) != 2:
            continue
        away = next(t for t in teams if t.get('homeAway') == 'away')
        home = next(t for t in teams if t.get('homeAway') == 'home')
        games.append({
            'date': date_obj.isoformat(),
            'away_team': away['team']['displayName'],
            'home_team': home['team']['displayName'],
            'away_score': away.get('score'),
            'home_score': home.get('score'),
            'arena': comp.get('venue', {}).get('fullName')
        })
    return games

# Fetch and save games for the past 3 seasons only (Nov → Apr)
from datetime import date

# Define exact season windows
season_ranges = [
    (date(2022, 11, 1), date(2023, 4, 15)),
    (date(2023, 11, 1), date(2024, 4, 15)),
    (date(2024, 11, 1), date(2025, 4, 15)),
]

all_games = []
for start_date, end_date in season_ranges:
    for single_date in daterange(start_date, end_date):
        daily = fetch_games_for_date(single_date)
        all_games.extend(daily)
        if daily:
            print(f"  – {len(daily)} games on {single_date}")

# Write CSV
with open('ncaa_mbb_games_past3seasons.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(
        f,
        fieldnames=[
            'date', 'away_team', 'home_team',
            'away_score', 'home_score', 'arena'
        ]
    )
    writer.writeheader()
    writer.writerows(all_games)

print(f"Saved {len(all_games)} games to ncaa_mbb_games_past3seasons.csv")


  – 24 games on 2022-11-07
  – 10 games on 2022-11-08
  – 26 games on 2022-11-09
  – 5 games on 2022-11-10
  – 16 games on 2022-11-11
  – 59 games on 2022-11-12
  – 1 games on 2022-11-13
  – 7 games on 2022-11-14
  – 6 games on 2022-11-15
  – 3 games on 2022-11-16
  – 3 games on 2022-11-17
  – 5 games on 2022-11-18
  – 1 games on 2022-11-19
  – 5 games on 2022-11-20
  – 1 games on 2022-11-21
  – 69 games on 2022-11-22
  – 3 games on 2022-11-23
  – 22 games on 2022-11-24
  – 3 games on 2022-11-25
  – 1 games on 2022-11-26
  – 2 games on 2022-11-27
  – 2 games on 2022-11-28
  – 3 games on 2022-11-29
  – 2 games on 2022-11-30
  – 2 games on 2022-12-01
  – 4 games on 2022-12-02
  – 6 games on 2022-12-03
  – 7 games on 2022-12-04
  – 2 games on 2022-12-05
  – 6 games on 2022-12-06
  – 4 games on 2022-12-07
  – 2 games on 2022-12-08
  – 1 games on 2022-12-09
  – 11 games on 2022-12-10
  – 2 games on 2022-12-11
  – 2 games on 2022-12-12
  – 3 games on 2022-12-13
  – 3 games on 2022-12-14
  – 

In [7]:
ncaa_games = pd.read_csv('ncaa_mbb_games_past3seasons.csv')
ncaa_games

Unnamed: 0,date,away_team,home_team,away_score,home_score,arena
0,2022-11-07,UNC Wilmington Seahawks,North Carolina Tar Heels,56,69,Dean E. Smith Center
1,2022-11-07,North Florida Ospreys,Gonzaga Bulldogs,63,104,McCarthey Athletic Center
2,2022-11-07,Northern Colorado Bears,Houston Cougars,36,83,Fertitta Center
3,2022-11-07,Howard Bison,Kentucky Wildcats,63,95,Rupp Arena
4,2022-11-07,Mississippi Valley State Delta Devils,Baylor Bears,53,117,Ferrell Center
...,...,...,...,...,...,...
3086,2025-03-30,Tennessee Volunteers,Houston Cougars,50,69,Lucas Oil Stadium
3087,2025-03-30,Michigan State Spartans,Auburn Tigers,64,70,State Farm Arena
3088,2025-04-05,Florida Gators,Auburn Tigers,79,73,Alamodome
3089,2025-04-05,Houston Cougars,Duke Blue Devils,70,67,Alamodome


In [None]:
import numpy as np
# Take 25 random games to ensure randomness and nonbiased results, extract frames out of these games only 
rows_to_idx = np.random.randin
rows_to_idx= np.random.randint(0, 3090, 25)
rows_to_idx

array([3014, 1750, 1263, 1290, 2319,  910,  423, 3041,  279,  706, 2858,
       2414, 1460,  639,  527,  304, 1066, 2012, 2629,  893, 1517,  956,
       1622,  758, 2334])

In [14]:
print(ncaa_games.iloc[3014])
print('---------------------')
print(ncaa_games.iloc[1750])
print('---------------------')
print(ncaa_games.iloc[1263])
print('---------------------')
print(ncaa_games.iloc[1290])
print('---------------------')
print(ncaa_games.iloc[2319])
print('---------------------')
print(ncaa_games.iloc[910])
print('---------------------')
print(ncaa_games.iloc[423])
print('---------------------')
print(ncaa_games.iloc[3041])
print('---------------------')
print(ncaa_games.iloc[279])
print('---------------------')
print(ncaa_games.iloc[706])
print('---------------------')
print(ncaa_games.iloc[2858])
print('---------------------')
print(ncaa_games.iloc[2414])
print('---------------------')
print(ncaa_games.iloc[1460])
print('---------------------')
print(ncaa_games.iloc[639])
print('---------------------')
print(ncaa_games.iloc[527])
print('---------------------')
print(ncaa_games.iloc[304])
print('---------------------')
print(ncaa_games.iloc[1066])
print('---------------------')
print(ncaa_games.iloc[2012])
print('---------------------')
print(ncaa_games.iloc[2629])
print('---------------------')
print(ncaa_games.iloc[893])
print('---------------------')
print(ncaa_games.iloc[1517])
print('---------------------')
print(ncaa_games.iloc[956])
print('---------------------')
print(ncaa_games.iloc[1622])
print('---------------------')
print(ncaa_games.iloc[758])
print('---------------------')
print(ncaa_games.iloc[2334])
print('---------------------')

date                2025-03-15
away_team     Arizona Wildcats
home_team      Houston Cougars
away_score                  64
home_score                  72
arena          T-Mobile Center
Name: 3014, dtype: object
---------------------
date                   2024-01-31
away_team     Boise State Broncos
home_team        New Mexico Lobos
away_score                     86
home_score                     78
arena                     The Pit
Name: 1750, dtype: object
---------------------
date                    2023-11-17
away_team             Mercer Bears
home_team     Alabama Crimson Tide
away_score                      67
home_score                      98
arena             Coleman Coliseum
Name: 1263, dtype: object
---------------------
date                    2023-11-22
away_team     Tennessee Volunteers
home_team          Kansas Jayhawks
away_score                      60
home_score                      69
arena          Stan Sheriff Center
Name: 1290, dtype: object
--------------------