# NCAA Men's Basketball Pairwise Rankings
Generate rankings for college basketball by pairwise comparisons

In [1]:
import numpy as np
import pandas as pd
from collections import Counter

## Import Data

In [2]:
teams = pd.read_csv('data/2019/Stage2DataFiles/Teams.csv')
results = pd.read_csv('data/2019/Stage2DataFiles/RegularSeasonCompactResults.csv')

In [3]:
def clean_data(teams, results, season):
    active_teams = teams.set_index('TeamID')[['TeamName']]
    season_results = results[results['Season'] == season]
    for team_id in list(teams.TeamID):
        wins = season_results[season_results['WTeamID'] == team_id]
        losses = season_results[season_results['LTeamID'] == team_id]
        if len(wins) + len(losses) > 0:
            active_teams.at[team_id, 'Wins'] = len(wins)
            active_teams.at[team_id, 'Losses'] = len(losses)
        else:
            active_teams.drop(team_id, inplace=True)
    return (active_teams, season_results)

In [4]:
active_teams, season_results = clean_data(teams, results, 2019)

## RPI - Ratings Percentage Index

* The team's own winning percentage (25%)

In [5]:
def WP(team_id, active_teams, season_results):
    wins = season_results[season_results['WTeamID'] == team_id]
    losses = season_results[season_results['LTeamID'] == team_id]
        
    weighted_wins, weighted_losses = 0, 0
    for game_id in wins.index:
        if wins.at[game_id, 'WLoc'] == 'A': weighted_wins += 1.3
        elif wins.at[game_id, 'WLoc'] == 'H': weighted_wins += 0.7
        else: weighted_wins += 1
    for game_id in losses.index:
        if losses.at[game_id, 'WLoc'] == 'A': weighted_losses += 1.3
        elif losses.at[game_id, 'WLoc'] == 'H': weighted_losses += 0.7
        else: weighted_losses += 1
    
    return weighted_wins/(weighted_wins+weighted_losses)

In [6]:
for team_id in active_teams.index:
    active_teams.at[team_id, 'WP'] = WP(team_id, active_teams, season_results)

* The average of the team's opponents' winning percentages (50%)

In [7]:
def OWP(team_id, active_teams, season_results):
    wins = list(season_results[season_results['WTeamID'] == team_id].LTeamID)
    losses = list(season_results[season_results['LTeamID'] == team_id].WTeamID)
    
    accumulator = 0
    for opp_id in wins+losses:
        # wins and losses excluding games against the TUC
        opp_wins = season_results[(season_results['WTeamID'] == opp_id) & (season_results['LTeamID'] != team_id)]
        opp_losses = season_results[(season_results['LTeamID'] == opp_id) & (season_results['WTeamID'] != team_id)]
        accumulator += len(opp_wins)/(len(opp_wins)+len(opp_losses))
    return accumulator/(len(wins)+len(losses))

In [8]:
for team_id in active_teams.index:
    active_teams.at[team_id, 'OWP'] = OWP(team_id, active_teams, season_results)

* The average of the team's opponents opponents' winning percentages (25%)

In [9]:
def OOWP(team_id, active_teams, season_results):
    wins = list(season_results[season_results['WTeamID'] == team_id].LTeamID)
    losses = list(season_results[season_results['LTeamID'] == team_id].WTeamID)
    
    accumulator = 0
    for opp_id in wins+losses:
        accumulator += active_teams.at[opp_id, 'OWP']
    return accumulator/(len(wins)+len(losses))

In [10]:
for team_id in active_teams.index:
    active_teams.at[team_id, 'OOWP'] = OOWP(team_id, active_teams, season_results)

* Combined RPI Formula

In [11]:
for team_id in active_teams.index:
    active_teams.at[team_id, 'RPI'] = (active_teams.at[team_id, 'WP']*0.25) + (active_teams.at[team_id, 'OWP']*0.5) + (active_teams.at[team_id, 'OOWP']*0.25)
active_teams.sort_values(by='RPI', ascending=False).head()

Unnamed: 0_level_0,TeamName,Wins,Losses,WP,OWP,OOWP,RPI
TeamID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1181,Duke,29.0,5.0,0.841772,0.639404,0.56414,0.67118
1438,Virginia,29.0,3.0,0.901639,0.583542,0.554421,0.655786
1242,Kansas,25.0,9.0,0.761733,0.64337,0.569318,0.654448
1314,North Carolina,27.0,6.0,0.805556,0.623452,0.56395,0.654102
1222,Houston,31.0,3.0,0.905063,0.578524,0.531978,0.648523


## Head-to-Head Record

In [12]:
def H2H(active_teams, season_results):
    h2h_matrix = pd.DataFrame(index=active_teams.index, columns=active_teams.index)
    for game_id in season_results.index:
        w_team_id = season_results.at[game_id, 'WTeamID']
        l_team_id = season_results.at[game_id, 'LTeamID']

        record = h2h_matrix.at[w_team_id, l_team_id]
        if record is np.NaN:
            h2h_matrix.at[w_team_id, l_team_id] = [1, 0]
            h2h_matrix.at[l_team_id, w_team_id] = [0, 1]
        else:
            record[0] += 1
            h2h_matrix.at[w_team_id, l_team_id] = record
            h2h_matrix.at[l_team_id, w_team_id] = list(reversed(record))
    return h2h_matrix

In [13]:
head_to_head = H2H(active_teams, season_results)

## Record vs. Common Opponents
A win-loss percentage against each individual common opponent, then average all those percentages together to smooth out situations where a team can beat up on the same opponent multiple times

In [14]:
def record_vs_common(active_teams, season_results, head_to_head):
    COp = pd.DataFrame(index=active_teams.index, columns=active_teams.index)
    for team_id in active_teams.index:
        wins = Counter(season_results[season_results['WTeamID'] == team_id].LTeamID)
        losses = Counter(season_results[season_results['LTeamID'] == team_id].WTeamID)

        opponents = set(list(wins.keys())+list(losses.keys()))
        for opp_id in opponents:
            opp_wins = Counter(season_results[season_results['WTeamID'] == opp_id].LTeamID)
            opp_losses = Counter(season_results[season_results['LTeamID'] == opp_id].WTeamID)

            opponent_opponents = set(list(opp_wins.keys())+list(opp_losses.keys()))
            common_opponents = opponents.intersection(opponent_opponents)
            
            if len(common_opponents) > 0:
                accumulator = 0
                for common_opp_id in common_opponents:
                    h2h_wins, h2h_losses = head_to_head.at[team_id, common_opp_id]
                    accumulator += h2h_wins/(h2h_wins+h2h_losses)
                COp.at[team_id, opp_id] = accumulator/len(common_opponents)
    return COp

In [15]:
common_opponents = record_vs_common(active_teams, season_results, head_to_head)

## Pairwise Comparison

In [16]:
def rank_pairwise(active_teams, head_to_head, common_opponents):
    for team_id in active_teams.index:
        comparison_accumulator = 0
        for opp_id in active_teams.index:
            team_comps_won, opp_comps_won = 0, 0
            
            # RPI
            team_rpi = active_teams.at[team_id, 'RPI']
            opp_rpi = active_teams.at[opp_id, 'RPI']
            
            # Head-to-head comparison
            h2h_record = head_to_head.at[team_id, opp_id]
            if not np.isnan(h2h_record).any():
                h2h_wins, h2h_losses = h2h_record
                team_comps_won += h2h_wins
                opp_comps_won += h2h_losses
            
            # Record vs Common opponents comparison
            team_COp = common_opponents.at[team_id, opp_id]
            opp_COp = common_opponents.at[opp_id, team_id]
            team_comps_won += 1 if team_COp > opp_COp else 0
            opp_comps_won += 1 if opp_COp > team_COp else 0
            
            if team_comps_won > opp_comps_won:
                comparison_accumulator += 1
            elif team_comps_won == opp_comps_won and team_rpi > opp_rpi:
                comparison_accumulator += 1
        
        active_teams.at[team_id, 'PWs'] = comparison_accumulator
    
    # Sort by total comparisons won to create a ranking            
    return active_teams.sort_values(by='PWs', ascending=False)

In [17]:
pairwise_rankings = rank_pairwise(active_teams, head_to_head, common_opponents)

In [18]:
pairwise_rankings

Unnamed: 0_level_0,TeamName,Wins,Losses,WP,OWP,OOWP,RPI,PWs
TeamID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1181,Duke,29.0,5.0,0.841772,0.639404,0.564140,0.671180,351.0
1438,Virginia,29.0,3.0,0.901639,0.583542,0.554421,0.655786,351.0
1242,Kansas,25.0,9.0,0.761733,0.643370,0.569318,0.654448,349.0
1222,Houston,31.0,3.0,0.905063,0.578524,0.531978,0.648523,348.0
1314,North Carolina,27.0,6.0,0.805556,0.623452,0.563950,0.654102,348.0
1277,Michigan St,28.0,6.0,0.835484,0.593853,0.559596,0.645696,345.0
1211,Gonzaga,30.0,3.0,0.910891,0.566335,0.533139,0.644175,345.0
1397,Tennessee,28.0,5.0,0.859107,0.574570,0.565880,0.643532,344.0
1276,Michigan,28.0,6.0,0.830565,0.575517,0.557338,0.634734,343.0
1246,Kentucky,27.0,6.0,0.810000,0.599908,0.563834,0.643412,343.0
