# College Basketball Code
## Dan McDonough
## February 25th, 2021

The purpose of this notebook is to compile four factors stats for D1 NCAAM teams ahead of march madness. My goal is to write functionality that gives a very condensed, stylistic game preview to digest ahead of watching the game.

### 1. Scrape Data

In [1]:
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import pandas as pd
import random
import timeit
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns
import scipy as sp

I've scraped the data from sports reference, which maintains tables of advanced stats for each team

In [2]:
url = 'https://www.sports-reference.com/cbb/seasons/2022-advanced-school-stats.html'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

datarows = soup.findAll('tr')

team = []
srs = []
sos = []
pace = []
ortg = []
ftr = []
efg = []
tov = []
orb = []

for i in range(2,len(datarows)):
    if i in [22,23,44,45,66,67,88,89,110,111,132,133,154,155,176,177,198,199,220,221,242,243,264,265,286,287,308,309,330,331,
            352,353,374,375]:
        continue
    team.append(str(datarows[i].findAll('td')[0].findAll('a')).split('>')[1].split('<')[0])
    srs.append(str(datarows[i].findAll('td')[5]).split('>')[1].split('<')[0])
    sos.append(str(datarows[i].findAll('td')[6]).split('>')[1].split('<')[0])
    pace.append(str(datarows[i].findAll('td')[20]).split('>')[1].split('<')[0])
    ortg.append(str(datarows[i].findAll('td')[21]).split('>')[1].split('<')[0])
    ftr.append(str(datarows[i].findAll('td')[22]).split('>')[1].split('<')[0])
    efg.append(str(datarows[i].findAll('td')[29]).split('>')[1].split('<')[0])
    tov.append(str(datarows[i].findAll('td')[30]).split('>')[1].split('<')[0])
    orb.append(str(datarows[i].findAll('td')[31]).split('>')[1].split('<')[0])

stats_df = pd.DataFrame({'Team':team,'SRS':srs,'SOS':sos,'Pace':pace,'OFF':ortg,'FTR':ftr,'EFG':efg,'TOV':tov,
                          'ORB':orb})

In [3]:
stats_df.head(2)

Unnamed: 0,Team,SRS,SOS,Pace,OFF,FTR,EFG,TOV,ORB
0,Abilene Christian,2.67,-2.09,71.8,104.7,0.374,0.492,14.9,29.2
1,Air Force,-5.24,1.76,63.4,93.1,0.268,0.509,19.6,19.6


In [4]:
url = 'https://www.sports-reference.com/cbb/seasons/2022-advanced-opponent-stats.html'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

datarows = soup.findAll('tr')

team = []
srs = []
sos = []
pace = []
drtg = []
oppftr = []
oppefg = []
opptov = []
opporb = []

for i in range(2,len(datarows)):
#     if ((i == 22) | (i == 33)):
    if i in [22,23,44,45,66,67,88,89,110,111,132,133,154,155,176,177,198,199,220,221,242,243,264,265,286,287,308,309,330,331,
            352,353,374,375]:
        continue
    team.append(str(datarows[i].findAll('td')[0].findAll('a')).split('>')[1].split('<')[0])
    srs.append(str(datarows[i].findAll('td')[5]).split('>')[1].split('<')[0])
    sos.append(str(datarows[i].findAll('td')[6]).split('>')[1].split('<')[0])
    pace.append(str(datarows[i].findAll('td')[20]).split('>')[1].split('<')[0])
    drtg.append(str(datarows[i].findAll('td')[21]).split('>')[1].split('<')[0])
    oppftr.append(str(datarows[i].findAll('td')[22]).split('>')[1].split('<')[0])
    oppefg.append(str(datarows[i].findAll('td')[29]).split('>')[1].split('<')[0])
    opptov.append(str(datarows[i].findAll('td')[30]).split('>')[1].split('<')[0])
    opporb.append(str(datarows[i].findAll('td')[31]).split('>')[1].split('<')[0])

opp_stats_df = pd.DataFrame({'Team':team,'SRS':srs,'SOS':sos,'Pace':pace,'DEF':drtg,'Opp FTR':oppftr,'Opp EFG':oppefg,
                         'Opp TOV':opptov,'Opp ORB':opporb})

In [5]:
opp_stats_df.tail(2)

Unnamed: 0,Team,SRS,SOS,Pace,DEF,Opp FTR,Opp EFG,Opp TOV,Opp ORB
356,Yale,0.06,-1.98,70.3,97.5,0.309,0.486,16.5,26.1
357,Youngstown State,-6.24,-7.4,67.5,102.6,0.28,0.499,15.3,27.8


In [6]:
combined_df = stats_df.merge(opp_stats_df[['Team','DEF','Opp FTR','Opp EFG','Opp TOV','Opp ORB']],on='Team')

Save file for future use

In [7]:
combined_df.to_csv('college.csv',index=False)

In [8]:
combined_df = pd.read_csv('college.csv')

combined_df['FTR'] = combined_df['FTR'] * 100
combined_df['EFG'] = combined_df['EFG'] * 100
combined_df['Opp FTR'] = combined_df['Opp FTR'] * 100
combined_df['Opp EFG'] = combined_df['Opp EFG'] * 100

### 2. Write Summary Function

In [9]:
def game_summary(away,home):
    away_df = combined_df[combined_df['Team']==away]
    home_df = combined_df[combined_df['Team']==home]
    home_df_adj = home_df[['Team', 'SRS', 'SOS', 'Pace','DEF', 'Opp FTR', 'Opp EFG', 'Opp TOV', 'Opp ORB', 'OFF', 'FTR', 
                           'EFG', 'TOV', 'ORB']].copy()
    away_df.columns = ['Team', 'SRS', 'SOS', 'Pace','Away Rtg', 'Away FTR', 'Away EFG', 'Away TOV', 'Away ORB',
                       'Home Rtg', 'Home FTR', 'Home EFG', 'Home TOV', 'Home ORB']
    home_df_adj.columns = ['Team', 'SRS', 'SOS', 'Pace','Away Rtg', 'Away FTR', 'Away EFG', 'Away TOV', 'Away ORB',
                       'Home Rtg', 'Home FTR', 'Home EFG', 'Home TOV', 'Home ORB']
    output_df = pd.concat([away_df,home_df_adj],axis=0)
    means_df = pd.DataFrame(output_df.mean()).transpose()
    output_df = pd.concat([output_df,means_df],sort=False).reset_index(drop=True)
    output_df.loc[2,'Team']='Average'
    for column in output_df.columns[1:]:
        output_df[column] = output_df[column].astype(int)
    return output_df[['Team', 'SRS', 'SOS', 'Pace','Away Rtg', 'Away EFG', 'Away TOV', 'Away ORB', 'Away FTR',
                       'Home Rtg', 'Home EFG', 'Home TOV', 'Home ORB','Home FTR']]

Play In Tuesday

In [10]:
game_summary('Texas A&amp;M-Corpus Christi','Texas Southern')

Unnamed: 0,Team,SRS,SOS,Pace,Away Rtg,Away EFG,Away TOV,Away ORB,Away FTR,Home Rtg,Home EFG,Home TOV,Home ORB,Home FTR
0,Texas A&amp;M-Corpus Christi,-6,-8,72,105,49,16,34,36,96,48,20,27,39
1,Texas Southern,-2,-6,69,93,44,15,27,34,99,48,18,33,33
2,Average,-4,-7,70,99,47,15,31,35,98,48,19,30,36


Pregame: Pass at -3.5

Away: Shooting, rebounding

Home: Rebounding, Free throws

In [11]:
game_summary('Indiana','Wyoming')

Unnamed: 0,Team,SRS,SOS,Pace,Away Rtg,Away EFG,Away TOV,Away ORB,Away FTR,Home Rtg,Home EFG,Home TOV,Home ORB,Home FTR
0,Indiana,13,8,67,103,51,15,27,33,95,45,15,24,30
1,Wyoming,9,4,66,97,47,14,24,24,107,53,15,25,35
2,Average,11,6,67,100,49,14,25,28,101,49,15,25,33


Pregame: Pass at +4

Away: Free throws

Home: Shooting

Play In Wednesday

In [12]:
game_summary('Bryant','Wright State')

Unnamed: 0,Team,SRS,SOS,Pace,Away Rtg,Away EFG,Away TOV,Away ORB,Away FTR,Home Rtg,Home EFG,Home TOV,Home ORB,Home FTR
0,Bryant,-4,-7,73,106,50,16,33,32,99,46,14,31,25
1,Wright State,-1,-5,69,102,50,16,29,25,108,51,15,31,31
2,Average,-3,-6,71,104,50,16,31,29,104,49,14,31,28


Pregame: Pass at -3.5

Away: Free throws

Home: Shooting, free throws

In [13]:
game_summary('Notre Dame','Rutgers')

Unnamed: 0,Team,SRS,SOS,Pace,Away Rtg,Away EFG,Away TOV,Away ORB,Away FTR,Home Rtg,Home EFG,Home TOV,Home ORB,Home FTR
0,Notre Dame,11,6,65,109,54,13,22,26,101,49,13,24,22
1,Rutgers,9,7,65,100,48,15,26,28,102,49,15,29,26
2,Average,10,6,65,104,51,14,24,27,101,49,14,27,24


Pregame: Notre Dame - at even

Away: Shooting

Home: Rebounding