<a href="https://colab.research.google.com/github/DrakeData/March_Madness_Bracket/blob/main/mm_bracket.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# March Madness - Bracket Generator
This is a simple script to generate and predict a March Madness bracket. It will be updated each year with new statistics.

## Helpfull Linkes:
- [March Maddness - Live Bracket](https://www.ncaa.com/march-madness-live/bracket)
- [NCAA Men's Backetball Stats](https://www.ncaa.com/stats/basketball-men/d1/current/team/859)

## Code Year Info
- 2022: Looking at only Defensive Rebounds Per Game.


In [1]:
import requests
from bs4 import BeautifulSoup
import datetime
from datetime import datetime, timedelta, date
from dateutil.parser import parse
import pandas as pd
from IPython.display import clear_output

## Webscrape Data
Data scraped from [NCAA Men's Basketball](https://www.ncaa.com/stats/basketball-men/d1/current/team/859) website.

In [2]:
main_url = 'https://www.ncaa.com/stats/basketball-men/d1/current/team/859'
page = requests.get(main_url)
soup = BeautifulSoup(page.content, 'html.parser')

print(soup.prettify())

clear_output()

In [3]:
cal_data = soup.find('tbody')
print(cal_data.prettify())

clear_output()

In [4]:
date_data = cal_data.find_all('tr')
date_data

clear_output()

In [5]:
date_data[0].find_all('td')

[<td>1</td>,
 <td><img src="https://i.turner.ncaa.com/sites/default/files/images/logos/schools/bgl/gonzaga.svg"/><a class="school" href="/schools/gonzaga">Gonzaga</a></td>,
 <td>29</td>,
 <td>930</td>,
 <td>32.07</td>]

In [6]:
date_data[1]

<tr>
<td>2</td>
<td><img src="https://i.turner.ncaa.com/sites/default/files/images/logos/schools/bgl/arizona.svg"/><a class="school" href="/schools/arizona">Arizona</a></td>
<td>34</td>
<td>1017</td>
<td>29.91</td>
</tr>

In [7]:
test = date_data[0].find_all('td')
test

[<td>1</td>,
 <td><img src="https://i.turner.ncaa.com/sites/default/files/images/logos/schools/bgl/gonzaga.svg"/><a class="school" href="/schools/gonzaga">Gonzaga</a></td>,
 <td>29</td>,
 <td>930</td>,
 <td>32.07</td>]

In [8]:
# loop through NCAA website
url_ls = ['https://www.ncaa.com/stats/basketball-men/d1/current/team/859',
          'https://www.ncaa.com/stats/basketball-men/d1/current/team/859/p2',
          'https://www.ncaa.com/stats/basketball-men/d1/current/team/859/p3',
          'https://www.ncaa.com/stats/basketball-men/d1/current/team/859/p4',
          'https://www.ncaa.com/stats/basketball-men/d1/current/team/859/p5',
          'https://www.ncaa.com/stats/basketball-men/d1/current/team/859/p6',
          'https://www.ncaa.com/stats/basketball-men/d1/current/team/859/p7']

In [9]:
rank_ls = []
team_name_ls = []
dearbs_ls = []
rpg_ls = []


for url in url_ls:
  main_url = url
  page = requests.get(main_url)
  soup = BeautifulSoup(page.content, 'html.parser')

  cal_data = soup.find('tbody')
  date_data = cal_data.find_all('tr')

  for raw_data in date_data:
    main = raw_data.find_all('td')

    rank = main[0].text
    team_name = main[1].text
    dearbs = main[2].text
    rpg = main[3].text

    rank_ls.append(rank)
    team_name_ls.append(team_name)
    dearbs_ls.append(dearbs)
    rpg_ls.append(rpg)

In [10]:
main_df = pd.DataFrame({'rank': rank_ls,
                        'school_name': team_name_ls,
                        'drebs': dearbs_ls,
                        'rpg': rpg_ls})

main_df.head()

Unnamed: 0,rank,school_name,drebs,rpg
0,1,Gonzaga,29,930
1,2,Arizona,34,1017
2,3,Utah Valley,32,944
3,4,Toledo,33,973
4,5,Oral Roberts,31,907


## Sweet 16 Winner Predictor

In [11]:
def first_round(start_teams_dict):
    '''
    Looks at each competing team's Defensive Rebounds per Game (drebs) and selects the team with the highest 
    average score to move on to the next round. Simulates round 1 through the 
    sweet 16.

    return: Sweet 16 winner to go onto the Elite 8
    ''' 
    # First round 
    round1_w_ls = []

    for dict_key in list(start_teams_dict.keys()):
      if start_teams_dict[dict_key][0].iloc[0]['drebs'] >  start_teams_dict[dict_key][1].iloc[0]['drebs']:
        print(f"Predicted winner for Round 1: {start_teams_dict[dict_key][0].iloc[0]['school_name']}")
        round1_w_ls.append(start_teams_dict[dict_key][0].iloc[0]['school_name'])
      else:
        print(f"Predicted winner for Round 1: {start_teams_dict[dict_key][1].iloc[0]['school_name']}")
        round1_w_ls.append(start_teams_dict[dict_key][1].iloc[0]['school_name'])

    # Second Round
    r2_team_ls = []

    for value in round1_w_ls:
      r2_team_ls.append(main_df[main_df['school_name'] == value])
      
      
    r2_dict = {'second_round_s1': [r2_team_ls[0], r2_team_ls[1]],
              'second_round_s2': [r2_team_ls[2], r2_team_ls[3]]}

    round2_w_ls = []

    for dict_key in list(r2_dict.keys()):
      if r2_dict[dict_key][0].iloc[0]['drebs'] >  r2_dict[dict_key][1].iloc[0]['drebs']:
        print(f"Predicted winner for Round 2: {r2_dict[dict_key][0].iloc[0]['school_name']}")
        round2_w_ls.append(r2_dict[dict_key][0].iloc[0]['school_name'])
      else:
        print(f"Predicted winner for Round 2: {r2_dict[dict_key][1].iloc[0]['school_name']}")
        round2_w_ls.append(r2_dict[dict_key][1].iloc[0]['school_name'])

    # SWEET 16
    r3_team_ls = []

    for value in round2_w_ls:
      r3_team_ls.append(main_df[main_df['school_name'] == value])
      
      
    r3_dict = {'third_round_s1': [r2_team_ls[0], r2_team_ls[1]]}

    for dict_key in list(r3_dict.keys()):
      if r3_dict[dict_key][0].iloc[0]['drebs'] >  r3_dict[dict_key][1].iloc[0]['drebs']:
        print(f"Predicted Sweet 16 winner: {r3_dict[dict_key][0].iloc[0]['school_name']}")
        sweet16_winner = r3_dict[dict_key][0].iloc[0]['school_name']
      else:
        print(f"Predicted Sweet 16 winner: {r3_dict[dict_key][1].iloc[0]['school_name']}")
        sweet16_winner = r3_dict[dict_key][1].iloc[0]['school_name']

    return sweet16_winner

### West Division Bracket

In [12]:
## West Division 1
team1 = main_df[main_df['school_name'] == 'Gonzaga']
team2 = main_df[main_df['school_name'] == 'Georgia St.']

team3 = main_df[main_df['school_name'] == 'Boise St.']
team4 = main_df[main_df['school_name'] == 'Memphis']

team5 = main_df[main_df['school_name'] == 'UConn']
team6 = main_df[main_df['school_name'] == 'New Mexico St.']

team7 = main_df[main_df['school_name'] == 'Arkansas']
team8 = main_df[main_df['school_name'] == 'Vermont']

In [13]:
west_dict1 = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [14]:
west_s16_winner1 = first_round(west_dict1)
west_s16_winner1

Predicted winner for Round 1: Gonzaga
Predicted winner for Round 1: Boise St.
Predicted winner for Round 1: New Mexico St.
Predicted winner for Round 1: Vermont
Predicted winner for Round 2: Boise St.
Predicted winner for Round 2: Vermont
Predicted Sweet 16 winner: Boise St.


'Boise St.'

In [83]:
## West Division 2
team1 = main_df[main_df['school_name'] == 'Alabama']
team2 = main_df[main_df['school_name'] == 'Notre Dame']

team3 = main_df[main_df['school_name'] == 'Texas Tech']
team4 = main_df[main_df['school_name'] == 'Montana St.']

team5 = main_df[main_df['school_name'] == 'Michigan St.']
team6 = main_df[main_df['school_name'] == 'Davidson']

team7 = main_df[main_df['school_name'] == 'Duke']
team8 = main_df[main_df['school_name'] == 'Cal St. Fullerton']

In [84]:
west_dict2 = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [85]:
west_s16_winner2 = first_round(west_dict2)
west_s16_winner2

Predicted winner for Round 1: Notre Dame
Predicted winner for Round 1: Montana St.
Predicted winner for Round 1: Michigan St.
Predicted winner for Round 1: Duke
Predicted winner for Round 2: Montana St.
Predicted winner for Round 2: Duke
Predicted Sweet 16 winner: Montana St.


'Montana St.'

### South Division

In [86]:
## South Division 1
team1 = main_df[main_df['school_name'] == 'Arizona']
team2 = main_df[main_df['school_name'] == 'Wright St.']

team3 = main_df[main_df['school_name'] == 'Seton Hall']
team4 = main_df[main_df['school_name'] == 'TCU']

team5 = main_df[main_df['school_name'] == 'Houston']
team6 = main_df[main_df['school_name'] == 'UAB']

team7 = main_df[main_df['school_name'] == 'Illinois']
team8 = main_df[main_df['school_name'] == 'Chattanooga']

In [87]:
south_dict1 = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [88]:
south_e8_winner1 = first_round(south_dict1)
south_e8_winner1

Predicted winner for Round 1: Wright St.
Predicted winner for Round 1: TCU
Predicted winner for Round 1: UAB
Predicted winner for Round 1: Chattanooga
Predicted winner for Round 2: Wright St.
Predicted winner for Round 2: Chattanooga
Predicted Sweet 16 winner: Wright St.


'Wright St.'

In [89]:
## South Division 2
team1 = main_df[main_df['school_name'] == 'Colorado St.']
team2 = main_df[main_df['school_name'] == 'Michigan']

team3 = main_df[main_df['school_name'] == 'Tennessee']
team4 = main_df[main_df['school_name'] == 'Longwood']

team5 = main_df[main_df['school_name'] == 'Ohio St.']
team6 = main_df[main_df['school_name'] == 'Loyola Chicago']

team7 = main_df[main_df['school_name'] == 'Villanova']
team8 = main_df[main_df['school_name'] == 'Delaware']

In [90]:
south_dict2 = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [91]:
south_e8_winner2 = first_round(south_dict2)
south_e8_winner2

Predicted winner for Round 1: Michigan
Predicted winner for Round 1: Tennessee
Predicted winner for Round 1: Loyola Chicago
Predicted winner for Round 1: Delaware
Predicted winner for Round 2: Tennessee
Predicted winner for Round 2: Delaware
Predicted Sweet 16 winner: Tennessee


'Tennessee'

### East Division

In [92]:
## East Division 1
team1 = main_df[main_df['school_name'] == 'Baylor']
team2 = main_df[main_df['school_name'] == 'Norfolk St.']

team3 = main_df[main_df['school_name'] == 'North Carolina']
team4 = main_df[main_df['school_name'] == 'Marquette']

team5 = main_df[main_df['school_name'] == "Saint Mary's (CA)"]
team6 = main_df[main_df['school_name'] == 'Indiana']

team7 = main_df[main_df['school_name'] == 'UCLA']
team8 = main_df[main_df['school_name'] == 'Akron']

In [93]:
east_dict1 = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [94]:
east_s16_winner1 = first_round(east_dict1)
east_s16_winner1

Predicted winner for Round 1: Baylor
Predicted winner for Round 1: North Carolina
Predicted winner for Round 1: Indiana
Predicted winner for Round 1: Akron
Predicted winner for Round 2: North Carolina
Predicted winner for Round 2: Indiana
Predicted Sweet 16 winner: North Carolina


'North Carolina'

In [95]:
## East Division 2
team1 = main_df[main_df['school_name'] == 'Texas']
team2 = main_df[main_df['school_name'] == 'Virginia Tech']

team3 = main_df[main_df['school_name'] == 'Purdue']
team4 = main_df[main_df['school_name'] == 'Yale']

team5 = main_df[main_df['school_name'] == "Murray St."]
team6 = main_df[main_df['school_name'] == 'San Francisco']

team7 = main_df[main_df['school_name'] == 'Kentucky']
team8 = main_df[main_df['school_name'] == "Saint Peter's"]

In [96]:
east_dict2 = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [97]:
east_s16_winner2 = first_round(east_dict2)
east_s16_winner2

Predicted winner for Round 1: Virginia Tech
Predicted winner for Round 1: Purdue
Predicted winner for Round 1: San Francisco
Predicted winner for Round 1: Kentucky
Predicted winner for Round 2: Virginia Tech
Predicted winner for Round 2: Kentucky
Predicted Sweet 16 winner: Virginia Tech


'Virginia Tech'

### Midwest Division

In [98]:
## Midwest Division 1
team1 = main_df[main_df['school_name'] == 'Kansas']
team2 = main_df[main_df['school_name'] == 'Texas Southern']

team3 = main_df[main_df['school_name'] == 'San Diego St.']
team4 = main_df[main_df['school_name'] == 'Creighton']

team5 = main_df[main_df['school_name'] == "Iowa"]
team6 = main_df[main_df['school_name'] == 'Richmond']

team7 = main_df[main_df['school_name'] == 'Providence']
team8 = main_df[main_df['school_name'] == 'South Dakota St.']

In [99]:
midwest_dict1 = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [100]:
midwest_s16_winner1 = first_round(midwest_dict1)
midwest_s16_winner1

Predicted winner for Round 1: Kansas
Predicted winner for Round 1: Creighton
Predicted winner for Round 1: Richmond
Predicted winner for Round 1: South Dakota St.
Predicted winner for Round 2: Kansas
Predicted winner for Round 2: Richmond
Predicted Sweet 16 winner: Kansas


'Kansas'

In [101]:
## Midwest Division 2
team1 = main_df[main_df['school_name'] == 'LSU']
team2 = main_df[main_df['school_name'] == 'Iowa St.']

team3 = main_df[main_df['school_name'] == 'Wisconsin']
team4 = main_df[main_df['school_name'] == 'Colgate']

team5 = main_df[main_df['school_name'] == "USC Upstate"]
team6 = main_df[main_df['school_name'] == 'Miami (FL)']

team7 = main_df[main_df['school_name'] == 'Auburn']
team8 = main_df[main_df['school_name'] == 'Jacksonville St.']

In [102]:
midwest_dict2 = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [103]:
midwest_s16_winner2 = first_round(midwest_dict2)
midwest_s16_winner2

Predicted winner for Round 1: LSU
Predicted winner for Round 1: Colgate
Predicted winner for Round 1: Miami (FL)
Predicted winner for Round 1: Auburn
Predicted winner for Round 2: Colgate
Predicted winner for Round 2: Miami (FL)
Predicted Sweet 16 winner: Colgate


'Colgate'

## Elite 8 Winner Prediction

In [104]:
def mm_winner(e8_teams_dict):
    '''
    Looks at each competing team's Defensive Rebounds per Game (drebs) and selects the team with the highest 
    average score to move on to the next round. Predicts March Madness winner (winner of the big dance).

    return: March Madness winner
    ''' 
    # First round 
    round1_w_ls = []

    for dict_key in list(e8_teams_dict.keys()):
      if e8_teams_dict[dict_key][0].iloc[0]['drebs'] >  e8_teams_dict[dict_key][1].iloc[0]['drebs']:
        print(f"Predicted winner for Elite 8: {e8_teams_dict[dict_key][0].iloc[0]['school_name']}")
        round1_w_ls.append(e8_teams_dict[dict_key][0].iloc[0]['school_name'])
      else:
        print(f"Predicted winner for Elite 8: {e8_teams_dict[dict_key][1].iloc[0]['school_name']}")
        round1_w_ls.append(e8_teams_dict[dict_key][1].iloc[0]['school_name'])

    # Second Round
    r2_team_ls = []

    for value in round1_w_ls:
      r2_team_ls.append(main_df[main_df['school_name'] == value])
      
      
    r2_dict = {'second_round_s1': [r2_team_ls[0], r2_team_ls[1]],
              'second_round_s2': [r2_team_ls[2], r2_team_ls[3]]}

    round2_w_ls = []

    for dict_key in list(r2_dict.keys()):
      if r2_dict[dict_key][0].iloc[0]['drebs'] >  r2_dict[dict_key][1].iloc[0]['drebs']:
        print(f"Predicted winner for Final 4: {r2_dict[dict_key][0].iloc[0]['school_name']}")
        round2_w_ls.append(r2_dict[dict_key][0].iloc[0]['school_name'])
      else:
        print(f"Predicted winner for Final 4: {r2_dict[dict_key][1].iloc[0]['school_name']}")
        round2_w_ls.append(r2_dict[dict_key][1].iloc[0]['school_name'])

    # SWEET 16
    r3_team_ls = []

    for value in round2_w_ls:
      r3_team_ls.append(main_df[main_df['school_name'] == value])
      
      
    r3_dict = {'third_round_s1': [r2_team_ls[0], r2_team_ls[1]]}

    for dict_key in list(r3_dict.keys()):
      if r3_dict[dict_key][0].iloc[0]['drebs'] >  r3_dict[dict_key][1].iloc[0]['drebs']:
        print(f"Predicted winner of the big dance: {r3_dict[dict_key][0].iloc[0]['school_name']}")
        mm_winner = r3_dict[dict_key][0].iloc[0]['school_name']
      else:
        print(f"Predicted winner of the big dance: {r3_dict[dict_key][1].iloc[0]['school_name']}")
        mm_winner = r3_dict[dict_key][1].iloc[0]['school_name']

    return mm_winner

In [107]:
## Midwest Division 1
team1 = main_df[main_df['school_name'] == west_s16_winner1]
team2 = main_df[main_df['school_name'] == west_s16_winner2]

team3 = main_df[main_df['school_name'] == south_e8_winner1]
team4 = main_df[main_df['school_name'] == south_e8_winner2]

team5 = main_df[main_df['school_name'] == east_s16_winner1]
team6 = main_df[main_df['school_name'] == east_s16_winner2]

team7 = main_df[main_df['school_name'] == midwest_s16_winner1]
team8 = main_df[main_df['school_name'] == midwest_s16_winner2]

In [108]:
elite8_dict = {'first_round_s1': [team1, team2],
             'first_round_s2': [team3, team4],
             'first_round_s3': [team5, team6],
             'first_round_s4': [team7, team8]}

In [109]:
mm_winner(elite8_dict)

Predicted winner for Elite 8: Montana St.
Predicted winner for Elite 8: Wright St.
Predicted winner for Elite 8: Virginia Tech
Predicted winner for Elite 8: Colgate
Predicted winner for Final 4: Wright St.
Predicted winner for Final 4: Virginia Tech
Predicted winner of the big dance: Wright St.


'Wright St.'