<h1><ins>Predicting the NBA Championship for 2024:</ins></h1>
<p>In this project, we will be gathering data to be able to predict the NBA champion for the 2023-2024 NBA Season.<p>

In [1]:
# Importing the ability to use Pandas to Manipulate data
import pandas as pd
# Importing the ability to work with arrays
import numpy as np
# Allowing us to see the max number of rows and columns in our dataframe
pd.set_option('display.max_rows', None, 'display.max_columns', None)

In [4]:
# Creating a list containig all of the years we are going to capture data for
a_list = []
for year in range(1990, 2023):
    a_list.append(year)

# Creating an empty dataframe
empty_df = pd.DataFrame()
 
# Going through each year to collect some main statistics associated with a team throughout the regular season
for year in a_list:
    df = pd.read_html(f"https://www.basketball-reference.com/leagues/NBA_{year}.html#all_per_game_team-oppenent")
    team_stats = df[5]
    # Adding a 'Year' column to get the year for each set of observations from that season
    team_stats['Year'] = year
    # Appending a dataset from that respective year to the empty dataset, essentially concatenating and stacking the datasets
    # throughout the years on top of each other
    empty_df = empty_df.append(team_stats, ignore_index = True)

In [137]:
# Reading in the NBA Stats dataset containing 32-years worth of Statistics.
nba = pd.read_excel("NBA Stats 1990-2022.xlsx")
win_losses = pd.read_excel("NBA_W_L_1990-2022.xlsx")

In [149]:
nba_final = pd.merge(nba, win_losses, on = (['Team', 'Year']))

# Renaming Columns

In [151]:
# Renaming all of the columns. These names are chosen to provide clearer understanding for those unfamiliar with basketball.
nba_final = nba_final.rename(columns = {'G':'Games Played','MP':'Minutes Played','FG':'Field Goals','FGA':'Field Goal Attempts',
                            'FG%':'Field Goal Percentage','3P':'3-Pointers','3PA':'3-Pointer Attempts',
                            '3P%':'3-Pointer Percentage','2P':'2-Pointers','2PA':'2-Pointer Attempts',
                            '2P%':'2-Pointer Percentage','FT':'Free-Throws','FTA':'Free-Throw Attempts',
                            'FT%':'Free-Throw Percentage','ORB':'Offensive Rebounds','DRB':'Defensive Rebounds',
                            'TRB':'Total Rebounds','AST':'Assists','STL':'Steals','BLK':'Blocks','TOV':'Turnovers',
                            'PF':'Personal Fouls','PTS':'Points'})

Some NBA Statsistics that we are working with, look at the **TOTAL** number of *rebounds*, *assists*, or *steals*. While getting the grand total for certain statistics is nice. We want to format our data so we can look at the average value of the stat per game. This will allow us to understand our data a lot more clearly.

In [153]:
# Iterating through each of the observations we have in our dataset
for value in nba.index:
    # Looking at just the observations before the year 2016. Starting in 2016, the NBA started to average the overall
    # game stats on average by game instead of cumulative of the entire season.
     if nba.loc[value, "Year"] <= 2015:
        # Getting the average value for "Points"
        nba.at[value, "Points"] = nba.loc[value, "Points"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Personal Fouls"
        nba.at[value, "Personal Fouls"] = nba.loc[value, "Personal Fouls"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Turnovers"
        nba.at[value, "Turnovers"] = nba.loc[value, "Turnovers"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Blocks"
        nba.at[value, "Blocks"] = nba.loc[value, "Blocks"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Steals"
        nba.at[value, "Steals"] = nba.loc[value, "Steals"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Assists"
        nba.at[value, "Assists"] = nba.loc[value, "Assists"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Total Rebounds"
        nba.at[value, "Total Rebounds"] = nba.loc[value, "Total Rebounds"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Defensive Rebounds"
        nba.at[value, "Defensive Rebounds"] = nba.loc[value, "Defensive Rebounds"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Offensive Rebounds"
        nba.at[value, "Offensive Rebounds"] = nba.loc[value, "Offensive Rebounds"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Free-Throw Attempts"
        nba.at[value, "Free-Throw Attempts"] = nba.loc[value, "Free-Throw Attempts"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Free-Throws"
        nba.at[value, "Free-Throws"] = nba.loc[value, "Free-Throws"] / nba.loc[value, "Games Played"]
        # Getting the average value for "2-Pointer Attempts"
        nba.at[value, "2-Pointer Attempts"] = nba.loc[value, "2-Pointer Attempts"] / nba.loc[value, "Games Played"]
        # Getting the average value for "2-Pointers"
        nba.at[value, "2-Pointers"] = nba.loc[value, "2-Pointers"] / nba.loc[value, "Games Played"]
        # Getting the average value for "3-Pointer Attempts"
        nba.at[value, "3-Pointer Attempts"] = nba.loc[value, "3-Pointer Attempts"] / nba.loc[value, "Games Played"]
        # Getting the average value for "3-Pointers"
        nba.at[value, "3-Pointers"] = nba.loc[value, "3-Pointers"] / nba.loc[value, "Games Played"]
        # Getting the average value for "Field Goal Attempts"
        nba.at[value, "Field Goal Attempts"] = nba.loc[value, "Field Goal Attempts"] / nba.loc[value, "Games Played"]
         #Getting the average value for "Field Goals"
        nba.at[value, "Field Goals"] = nba.loc[value, "Field Goals"] / nba.loc[value, "Games Played"]
        # Getting the average value of minutes played
        nba.at[value, "Minutes Played"] = nba.loc[value, "Minutes Played"] / nba.loc[value, "Games Played"]

# Helper Functions to Clean our Data and Create new Features

In [154]:
def removing_asterisks(team):
    '''
    Returns a clean name of the team in our dataset.
    '''
    if '*' in team:
        team = team[:-1]
        return team
    else:
        return team
    
def assign_conference(team):
    east = ["Atlanta Hawks", "Boston Celtics", "Cleveland Cavaliers", "New York Knicks", "Washington Bullets", 
            "Washington Wizards", "New Jersey Nets", "Brooklyn Nets", "Detroit Pistons", "Indiana Pacers",
            "Milwaukee Bucks", "Charlotte Hornets", "Charlotte Bobcats", "Miami Heat", "Toronto Raptors", 
            "Philadelphia 76ers", "Orlando Magic", "Chicago Bulls"]
    if team in east:
        return "Eastern"
    else:
        return "Western"
    
def assign_division(team):
    atlantic = ["Boston Celtics", "New Jersey Nets", "Brooklyn Nets", 
                "New York Knicks", "Philadelphia 76ers", "Toronto Raptors"]
    central = ["Chicago Bulls", "Cleveland Cavaliers", "Detroit Pistons", "Indiana Pacers", "Milwaukee Bucks"]
    southeast = ["Atlanta Hawks", "Charlotte Bobcats","Charlotte Hornets", 
                 "Miami Heat", "Orlando Magic", "Washington Wizards"]
    northwest = ["Denver Nuggets", "Minnesota Timberwolves", "Oklahoma City Thunder", "Portland Trail Blazers", "Utah Jazz"]
    pacific = ["Golden State Warriors", "Los Angeles Clippers", "Los Angeles Lakers", "Phoenix Suns", "Sacramento Kings"]
    southwest = ["New Orleans Pelicans", "Dallas Mavericks", "Houston Rockets", 
                 "New Orleans Hornets", "Memphis Grizzlies", "Vancouver Grizzlies", "San Antonio Spurs"]
    if team in atlantic:
        return "Atlantic"
    elif team in central:
        return "Central"
    elif team in southeast:
        return "Southeast"
    elif team in pacific:
        return "Pacific"
    elif team in northwest:
        return "Northwest"
    elif team in southwest:
        return "Southwest"

In [155]:
nba["Division"] = nba["Team"].apply(assign_division)
# Creating Accurate Values for the Atlantic Division
years_to_check_15 = list(range(1990, 2005))
years_to_check_14 = list(range(1991, 2005))
years_to_check_12 = list(range(1991, 2003))
years_to_check_8 = list(range(1996, 2005))
years_to_check_4 = list(range(2005, 2009))
years_to_check_2 = list(range(2003, 2005))
years_to_check_2_1 = list(range(2006, 2008))
nba.loc[(nba["Team"] == "Miami Heat") & (nba["Year"].isin(years_to_check_15)), 'Division'] = 'Atlantic'
nba.loc[((nba["Team"] == "Washington Bullets") | (nba["Team"] == "Washington Wizards")) & (nba["Year"].isin(years_to_check_15))
           , 'Division'] = 'Atlantic'
nba.loc[(nba["Team"] == "Orlando Magic") & (nba["Year"].isin(years_to_check_14)), 'Division'] = 'Atlantic'
# Creating Accurate Values for the Central Division
nba.loc[(nba["Team"] == "Atlanta Hawks") & (nba["Year"].isin(years_to_check_15)), 'Division'] = 'Central'
nba.loc[(nba["Team"] == "Orlando Magic") & (nba["Year"] == 1990), 'Division'] = 'Central'
nba.loc[(nba["Team"] == "Charlotte Hornets") & (nba["Year"].isin(years_to_check_12)), 'Division'] = 'Central'
nba.loc[(nba["Team"] == "Toronto Raptors") & (nba["Year"].isin(years_to_check_8)), 'Division'] = 'Central'
nba.loc[(nba["Team"] == "New Orleans Hornets") & (nba["Year"].isin(years_to_check_2)), 'Division'] = 'Central'
# Creating Accurate Values for the Pacific Division
nba.loc[(nba["Team"] == "Portland Trail Blazers") & (nba["Year"].isin(years_to_check_15)), 'Division'] = 'Pacific'
nba.loc[(nba["Team"] == "Seattle SuperSonics") & (nba["Year"].isin(years_to_check_15)), 'Division'] = 'Pacific'
# Creating Accurate Values for the Northwest Division
nba.loc[(nba["Team"] == "Seattle SuperSonics") & (nba["Year"].isin(years_to_check_4)), 'Division'] = 'Northwest'
nba.loc[(nba["Team"] == "New Orleans/Oklahoma City Hornets") & (nba["Year"].isin(years_to_check_2_1)), 'Division'] = 'Northwest'

# Applying our Functions to our Dataset

In [156]:
nba['Team'] = nba['Team'].apply(removing_asterisks)
win_losses["Team"] = nba['Team'].apply(removing_asterisks)

With everything in life, there is always change. In this case, the New Orleans Hornets were mainly a Western Conference team and the Orlando Magic and Charlotte Hornets were mainly an Eastern Conference team. But a few years needed to be changed which is what takes below. But for the main part, our function takes care of the job!

In [157]:
nba["Conference"] = nba["Team"].apply(assign_conference)

nba.loc[(nba["Team"] == "New Orleans Hornets") & (nba["Year"] == 2003), 'Conference'] = 'Eastern'
nba.loc[(nba["Team"] == "New Orleans Hornets") & (nba["Year"] == 2004), 'Conference'] = 'Eastern'
nba.loc[(nba["Team"] == "Orlando Magic") & (nba["Year"] == 1991), 'Conference'] = "Western"
nba.loc[(nba["Team"] == "Charlotte Hornets") & (nba["Year"] == 1990), 'Conference'] = "Western"

In [158]:
nba["Division"] = nba["Team"].apply(assign_division)
# Creating Accurate Values for the Atlantic Division
years_to_check_15 = list(range(1990, 2005))
years_to_check_14 = list(range(1991, 2005))
years_to_check_12 = list(range(1991, 2003))
years_to_check_8 = list(range(1996, 2005))
years_to_check_4 = list(range(2005, 2009))
years_to_check_2 = list(range(2003, 2005))
years_to_check_2_1 = list(range(2006, 2008))
nba.loc[(nba["Team"] == "Miami Heat") & (nba["Year"].isin(years_to_check_15)), 'Division'] = 'Atlantic'
nba.loc[((nba["Team"] == "Washington Bullets") | (nba["Team"] == "Washington Wizards")) & (nba["Year"].isin(years_to_check_15))
           , 'Division'] = 'Atlantic'
nba.loc[(nba["Team"] == "Orlando Magic") & (nba["Year"].isin(years_to_check_14)), 'Division'] = 'Atlantic'
# Creating Accurate Values for the Central Division
nba.loc[(nba["Team"] == "Atlanta Hawks") & (nba["Year"].isin(years_to_check_15)), 'Division'] = 'Central'
nba.loc[(nba["Team"] == "Orlando Magic") & (nba["Year"] == 1990), 'Division'] = 'Central'
nba.loc[(nba["Team"] == "Charlotte Hornets") & (nba["Year"].isin(years_to_check_12)), 'Division'] = 'Central'
nba.loc[(nba["Team"] == "Toronto Raptors") & (nba["Year"].isin(years_to_check_8)), 'Division'] = 'Central'
nba.loc[(nba["Team"] == "New Orleans Hornets") & (nba["Year"].isin(years_to_check_2)), 'Division'] = 'Central'
# Creating Accurate Values for the Pacific Division
nba.loc[(nba["Team"] == "Portland Trail Blazers") & (nba["Year"].isin(years_to_check_15)), 'Division'] = 'Pacific'
nba.loc[(nba["Team"] == "Seattle SuperSonics") & (nba["Year"].isin(years_to_check_15)), 'Division'] = 'Pacific'
# Creating Accurate Values for the Northwest Division
nba.loc[(nba["Team"] == "Seattle SuperSonics") & (nba["Year"].isin(years_to_check_4)), 'Division'] = 'Northwest'
nba.loc[(nba["Team"] == "New Orleans/Oklahoma City Hornets") & (nba["Year"].isin(years_to_check_2_1)), 'Division'] = 'Northwest'

In [148]:
nba.head()

Unnamed: 0,Rk,Team,Games Played,Minutes Played,Field Goals,Field Goal Attempts,Field Goal Percentage,3-Pointers,3-Pointer Attempts,3-Pointer Percentage,2-Pointers,2-Pointer Attempts,2-Pointer Percentage,Free-Throws,Free-Throw Attempts,Free-Throw Percentage,Offensive Rebounds,Defensive Rebounds,Total Rebounds,Assists,Steals,Blocks,Turnovers,Personal Fouls,Points,Year,Longest Win Streak,Division,Conference
0,1,Detroit Pistons,82,241.52439,37.109756,83.036585,0.447,2.268293,6.804878,0.333,34.841463,76.231707,0.457,21.768293,28.560976,0.762,12.682927,27.817073,40.5,21.512195,7.390244,3.707317,15.219512,25.268293,98.256098,1990,13,Central,Eastern
1,2,Minnesota Timberwolves,82,242.134146,37.573171,77.939024,0.482,1.573171,4.768293,0.33,36.0,73.170732,0.492,22.670732,29.512195,0.768,12.804878,28.317073,41.121951,22.926829,7.04878,5.963415,16.658537,21.317073,99.390244,1990,4,Northwest,Western
2,3,Utah Jazz,82,241.829268,38.585366,84.743902,0.455,2.121951,6.865854,0.309,36.463415,77.878049,0.468,22.743902,29.902439,0.761,13.414634,26.95122,40.365854,22.987805,8.97561,5.512195,15.134146,23.292683,102.036585,1990,9,Northwest,Western
3,4,Dallas Mavericks,82,242.134146,40.097561,85.52439,0.469,2.134146,7.158537,0.298,37.963415,78.365854,0.484,19.841463,26.353659,0.753,13.926829,29.243902,43.170732,23.914634,7.914634,4.731707,15.073171,22.341463,102.170732,1990,7,Southwest,Western
4,5,San Antonio Spurs,82,240.609756,39.865854,86.463415,0.461,2.817073,7.609756,0.37,37.04878,78.853659,0.47,20.280488,26.45122,0.767,13.719512,27.329268,41.04878,23.987805,9.268293,5.073171,17.317073,23.402439,102.829268,1990,7,Southwest,Western
