## Basics of Forecasting: NBA Data Set

### 1. Data Preparation

In [1]:
#Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }
</style>
"""))

In [3]:
#Import Data
NBA = pd.read_csv("Assignment Data/NBA_Games2.csv")

In [4]:
NBA.columns

Index(['CITY', 'TEAM_NAME', 'TEAM_ID', 'NICKNAME', 'STATE', 'YEAR_FOUNDED',
       'SEASON_ID', 'TEAM_ABBREVIATION', 'GAME_ID', 'GAME_DATE', 'MATCHUP',
       'WL', 'MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT',
       'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK',
       'TOV', 'PF', 'PLUS_MINUS', 'WIN'],
      dtype='object')

In [5]:
NBA['TEAM_ID'] = NBA['TEAM_ID'].astype(int)
NBA['YEAR_FOUNDED'] = NBA['YEAR_FOUNDED'].astype(int)
NBA['SEASON_ID'] = NBA['SEASON_ID'].astype(int)
NBA['GAME_ID'] = NBA['GAME_ID'].astype(int)
pd.to_datetime(NBA['GAME_DATE'])
NBA[['MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT',
    'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK',
    'TOV', 'PF', 'WIN']] = NBA[['MIN', 'PTS', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT',
                                'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK',
                                'TOV', 'PF', 'WIN']].astype(int)
NBA[['MIN', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'PLUS_MINUS']] = NBA[['MIN', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'PLUS_MINUS']].astype(float)

Load the “NBA_Game2” file, extract 2017 regular season records, named “NBA17”.

In [6]:
NBA17 = NBA[ (NBA["SEASON_ID"] == 22017) & (NBA["GAME_ID"] <= 1000000000) ]

Explore the data set.

In [7]:
NBA17.describe

<bound method NDFrame.describe of             CITY          TEAM_NAME     TEAM_ID NICKNAME           STATE  \
101      Atlanta      Atlanta Hawks  1610612737    Hawks         Atlanta   
102      Atlanta      Atlanta Hawks  1610612737    Hawks         Atlanta   
103      Atlanta      Atlanta Hawks  1610612737    Hawks         Atlanta   
104      Atlanta      Atlanta Hawks  1610612737    Hawks         Atlanta   
105      Atlanta      Atlanta Hawks  1610612737    Hawks         Atlanta   
...          ...                ...         ...      ...             ...   
18106  Charlotte  Charlotte Hornets  1610612766  Hornets  North Carolina   
18107  Charlotte  Charlotte Hornets  1610612766  Hornets  North Carolina   
18108  Charlotte  Charlotte Hornets  1610612766  Hornets  North Carolina   
18109  Charlotte  Charlotte Hornets  1610612766  Hornets  North Carolina   
18110  Charlotte  Charlotte Hornets  1610612766  Hornets  North Carolina   

       YEAR_FOUNDED  SEASON_ID TEAM_ABBREVIATION   GA

In [8]:
NBA17.drop(["CITY", "TEAM_ID", "NICKNAME", "STATE", "YEAR_FOUNDED", "SEASON_ID", "TEAM_ABBREVIATION"], axis=1, inplace=True)

In [9]:
NBA17['HOME'] = NBA17['MATCHUP'].apply(lambda x: 1 if "vs." in x else 0)

In [10]:
home_teams = NBA17[NBA17['HOME'] == 1].copy()
away_teams = NBA17[NBA17['HOME'] == 0].copy()

home_teams.rename(columns=lambda x: 'home_' + x, inplace=True)
away_teams.rename(columns=lambda x: 'away_' + x, inplace=True)

home_teams.rename(columns={'home_GAME_ID': 'GAME_ID'}, inplace=True)
away_teams.rename(columns={'away_GAME_ID': 'GAME_ID'}, inplace=True)

NBA17_combined = pd.merge(home_teams, away_teams, on='GAME_ID')

In [11]:
NBA17_combined

Unnamed: 0,home_TEAM_NAME,GAME_ID,home_GAME_DATE,home_MATCHUP,home_WL,home_MIN,home_PTS,home_FGM,home_FGA,home_FG_PCT,...,away_DREB,away_REB,away_AST,away_STL,away_BLK,away_TOV,away_PF,away_PLUS_MINUS,away_WIN,away_HOME
0,Atlanta Hawks,21701214,2018-04-10,ATL vs. PHI,L,240.0,113,41,98,0.0,...,43,54,29,7,8,18,22,8.0,1,0
1,Atlanta Hawks,21701172,2018-04-04,ATL vs. MIA,L,239.0,86,33,90,0.0,...,44,54,23,9,10,11,17,29.0,1,0
2,Atlanta Hawks,21701147,2018-04-01,ATL vs. ORL,W,241.0,94,41,85,0.0,...,30,41,19,8,6,5,10,-6.0,0,0
3,Atlanta Hawks,21701130,2018-03-30,ATL vs. PHI,L,239.0,91,36,101,0.0,...,44,53,34,10,6,15,18,10.0,1,0
4,Atlanta Hawks,21701021,2018-03-15,ATL vs. CHA,L,242.0,117,42,90,0.0,...,39,45,30,3,3,10,21,12.0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1143,Charlotte Hornets,21700105,2017-11-01,CHA vs. MIL,W,239.0,126,45,84,0.0,...,31,40,29,3,3,14,25,-5.0,0,0
1144,Charlotte Hornets,21700088,2017-10-29,CHA vs. ORL,W,242.0,120,53,97,0.0,...,30,39,28,11,4,11,20,-7.0,0,0
1145,Charlotte Hornets,21700070,2017-10-27,CHA vs. HOU,L,242.0,93,33,88,0.0,...,44,51,21,5,6,15,26,14.0,1,0
1146,Charlotte Hornets,21700055,2017-10-25,CHA vs. DEN,W,240.0,110,38,81,0.0,...,33,51,18,6,1,13,23,-17.0,0,0


### We will use the latter half of the 2017 regular season data as a D.V for forecasting

Obtain the correlation between Pythagorean winning % for the 1st half of the regular season (less than or equal to game ID 21700615 in the pre period and all game ids after this in the post period)

In [12]:
NBA17_1st = NBA17_combined[NBA17_combined["GAME_ID"] <= 21700615]

home_wins = NBA17_1st.groupby("home_TEAM_NAME", as_index=False)["home_WIN"].sum().rename(columns={'home_TEAM_NAME': 'TEAM_NAME'})
away_wins = NBA17_1st.groupby("away_TEAM_NAME", as_index=False)["away_WIN"].sum().rename(columns={'away_TEAM_NAME': 'TEAM_NAME'})
home_games = NBA17_1st.groupby("home_TEAM_NAME", as_index=False)["GAME_ID"].count().rename(columns={'GAME_ID': 'home_GAMES', 'home_TEAM_NAME': 'TEAM_NAME'})
away_games = NBA17_1st.groupby("away_TEAM_NAME", as_index=False)["GAME_ID"].count().rename(columns={'GAME_ID': 'away_GAMES', 'away_TEAM_NAME': 'TEAM_NAME'})

total_wins = pd.merge(home_wins, away_wins, on="TEAM_NAME")
total_games = pd.merge(home_games, away_games, on="TEAM_NAME")

total_wins['Total_WIN'] = total_wins['home_WIN'] + total_wins['away_WIN']
total_games['Total_GAMES'] = total_games['home_GAMES'] + total_games['away_GAMES']
total_games_wl = pd.merge(total_wins, total_games, on="TEAM_NAME")

total_games_wl['wpct_1st'] = total_games_wl['Total_WIN']/total_games_wl['Total_GAMES']

In [13]:
print( total_games[total_games["TEAM_NAME"] == "Atlanta Hawks"], total_games[total_games["TEAM_NAME"] == "Chicago Bulls"] )

       TEAM_NAME  home_GAMES  away_GAMES  Total_GAMES
0  Atlanta Hawks          17          22           39        TEAM_NAME  home_GAMES  away_GAMES  Total_GAMES
4  Chicago Bulls          20          22           42


In [14]:
total_games_wl

Unnamed: 0,TEAM_NAME,home_WIN,away_WIN,Total_WIN,home_GAMES,away_GAMES,Total_GAMES,wpct_1st
0,Atlanta Hawks,7,4,11,17,22,39,0.282051
1,Boston Celtics,18,16,34,23,21,44,0.772727
2,Brooklyn Nets,9,6,15,22,19,41,0.365854
3,Charlotte Hornets,9,5,14,20,17,37,0.378378
4,Chicago Bulls,9,6,15,20,22,42,0.357143
5,Cleveland Cavaliers,14,11,25,18,22,40,0.625
6,Dallas Mavericks,8,6,14,22,19,41,0.341463
7,Denver Nuggets,14,7,21,19,22,41,0.512195
8,Detroit Pistons,13,8,21,18,21,39,0.538462
9,Golden State Warriors,16,15,31,21,18,39,0.794872


In [15]:
home_pts = NBA17_1st.groupby("home_TEAM_NAME", as_index=False)["home_PTS"].sum().rename(columns={'home_PTS': 'PTS_home', 'home_TEAM_NAME': 'TEAM_NAME'})
away_pts = NBA17_1st.groupby("away_TEAM_NAME", as_index=False)["away_PTS"].sum().rename(columns={'away_PTS': 'PTS_away', 'away_TEAM_NAME': 'TEAM_NAME'})
home_pts_allowed = NBA17_1st.groupby("home_TEAM_NAME", as_index=False)["away_PTS"].sum().rename(columns={'away_PTS': 'PTS_allowed_home', 'home_TEAM_NAME': 'TEAM_NAME'})
away_pts_allowed = NBA17_1st.groupby("away_TEAM_NAME", as_index=False)["home_PTS"].sum().rename(columns={'home_PTS': 'PTS_allowed_away', 'away_TEAM_NAME': 'TEAM_NAME'})

total_pts = pd.merge(home_pts, away_pts, on="TEAM_NAME")
total_pts_allowed = pd.merge(home_pts_allowed, away_pts_allowed, on="TEAM_NAME")

total_pts['Total_PTS'] = total_pts['PTS_home'] + total_pts['PTS_away']
total_pts_allowed['Total_PTS_allowed'] = total_pts_allowed['PTS_allowed_home'] + total_pts_allowed['PTS_allowed_away']
total_pts = pd.merge(total_pts, total_pts_allowed, on="TEAM_NAME")

total_pts["pyth_wpct_1st"] = total_pts["Total_PTS"]**2 / (total_pts["Total_PTS"]**2 + total_pts["Total_PTS_allowed"]**2)

In [16]:
total_pts

Unnamed: 0,TEAM_NAME,PTS_home,PTS_away,Total_PTS,PTS_allowed_home,PTS_allowed_away,Total_PTS_allowed,pyth_wpct_1st
0,Atlanta Hawks,1782,2270,4052,1807,2407,4214,0.480409
1,Boston Celtics,2401,2131,4532,2254,2046,4300,0.52625
2,Brooklyn Nets,2322,2020,4342,2362,2104,4466,0.485925
3,Charlotte Hornets,2121,1781,3902,2138,1811,3949,0.494014
4,Chicago Bulls,2023,2272,4295,2073,2478,4551,0.471085
5,Cleveland Cavaliers,1999,2389,4388,1956,2392,4348,0.504579
6,Dallas Mavericks,2277,1927,4204,2274,2015,4289,0.489993
7,Denver Nuggets,2107,2293,4400,1966,2385,4351,0.505599
8,Detroit Pistons,1868,2125,3993,1780,2186,3966,0.503392
9,Golden State Warriors,2395,2098,4493,2193,1941,4134,0.541542


In [17]:
total_stats_1st = pd.merge(total_games_wl[["TEAM_NAME", "wpct_1st"]], total_pts[["TEAM_NAME", "pyth_wpct_1st"]], on="TEAM_NAME")
total_stats_1st 

Unnamed: 0,TEAM_NAME,wpct_1st,pyth_wpct_1st
0,Atlanta Hawks,0.282051,0.480409
1,Boston Celtics,0.772727,0.52625
2,Brooklyn Nets,0.365854,0.485925
3,Charlotte Hornets,0.378378,0.494014
4,Chicago Bulls,0.357143,0.471085
5,Cleveland Cavaliers,0.625,0.504579
6,Dallas Mavericks,0.341463,0.489993
7,Denver Nuggets,0.512195,0.505599
8,Detroit Pistons,0.538462,0.503392
9,Golden State Warriors,0.794872,0.541542


Manipulate the 2nd half of the regular season data to be used as DV for forecasting.

In [18]:
NBA17_2nd = NBA17_combined[NBA17_combined["GAME_ID"] > 21700615]

home_wins = NBA17_2nd.groupby("home_TEAM_NAME", as_index=False)["home_WIN"].sum().rename(columns={'home_TEAM_NAME': 'TEAM_NAME'})
away_wins = NBA17_2nd.groupby("away_TEAM_NAME", as_index=False)["away_WIN"].sum().rename(columns={'away_TEAM_NAME': 'TEAM_NAME'})
home_games = NBA17_2nd.groupby("home_TEAM_NAME", as_index=False)["GAME_ID"].count().rename(columns={'GAME_ID': 'home_GAMES', 'home_TEAM_NAME': 'TEAM_NAME'})
away_games = NBA17_2nd.groupby("away_TEAM_NAME", as_index=False)["GAME_ID"].count().rename(columns={'GAME_ID': 'away_GAMES', 'away_TEAM_NAME': 'TEAM_NAME'})

total_wins = pd.merge(home_wins, away_wins, on="TEAM_NAME")
total_games = pd.merge(home_games, away_games, on="TEAM_NAME")

total_wins['Total_WIN'] = total_wins['home_WIN'] + total_wins['away_WIN']
total_games['Total_GAMES'] = total_games['home_GAMES'] + total_games['away_GAMES']
total_games_wl = pd.merge(total_wins, total_games, on="TEAM_NAME")

total_games_wl['wpct_2nd'] = total_games_wl['Total_WIN']/total_games_wl['Total_GAMES']

In [19]:
home_pts = NBA17_2nd.groupby("home_TEAM_NAME", as_index=False)["home_PTS"].sum().rename(columns={'home_PTS': 'PTS_home', 'home_TEAM_NAME': 'TEAM_NAME'})
away_pts = NBA17_2nd.groupby("away_TEAM_NAME", as_index=False)["away_PTS"].sum().rename(columns={'away_PTS': 'PTS_away', 'away_TEAM_NAME': 'TEAM_NAME'})
home_pts_allowed = NBA17_2nd.groupby("home_TEAM_NAME", as_index=False)["away_PTS"].sum().rename(columns={'away_PTS': 'PTS_allowed_home', 'home_TEAM_NAME': 'TEAM_NAME'})
away_pts_allowed = NBA17_2nd.groupby("away_TEAM_NAME", as_index=False)["home_PTS"].sum().rename(columns={'home_PTS': 'PTS_allowed_away', 'away_TEAM_NAME': 'TEAM_NAME'})

total_pts = pd.merge(home_pts, away_pts, on="TEAM_NAME")
total_pts_allowed = pd.merge(home_pts_allowed, away_pts_allowed, on="TEAM_NAME")

total_pts['Total_PTS'] = total_pts['PTS_home'] + total_pts['PTS_away']
total_pts_allowed['Total_PTS_allowed'] = total_pts_allowed['PTS_allowed_home'] + total_pts_allowed['PTS_allowed_away']
total_pts = pd.merge(total_pts, total_pts_allowed, on="TEAM_NAME")

total_pts["pyth_wpct_2nd"] = total_pts["Total_PTS"]**2 / (total_pts["Total_PTS"]**2 + total_pts["Total_PTS_allowed"]**2)

In [20]:
total_stats_2nd = pd.merge(total_games_wl[["TEAM_NAME", "wpct_2nd"]], total_pts[["TEAM_NAME", "pyth_wpct_2nd"]], on="TEAM_NAME")
total_stats_2nd 

Unnamed: 0,TEAM_NAME,wpct_2nd,pyth_wpct_2nd
0,Atlanta Hawks,0.317073,0.46887
1,Boston Celtics,0.555556,0.508167
2,Brooklyn Nets,0.333333,0.480413
3,Charlotte Hornets,0.488372,0.506432
4,Chicago Bulls,0.315789,0.462752
5,Cleveland Cavaliers,0.6,0.505157
6,Dallas Mavericks,0.236842,0.47906
7,Denver Nuggets,0.631579,0.507088
8,Detroit Pistons,0.414634,0.496086
9,Golden State Warriors,0.615385,0.51162


 ### Merge NBA17_pre_team data to NBA17_post_team data

In [21]:
total_stats = pd.merge(total_stats_1st, total_stats_2nd, on="TEAM_NAME")

In [22]:
total_stats_1st.corr()

Unnamed: 0,wpct_1st,pyth_wpct_1st
wpct_1st,1.0,0.91295
pyth_wpct_1st,0.91295,1.0


In [23]:
total_stats["home_TEAM_NAME"] = total_stats["TEAM_NAME"]
NBA17_all = pd.concat([NBA17_1st, NBA17_2nd], ignore_index=True)
NBA17_all = pd.merge(NBA17_all, total_stats, on="home_TEAM_NAME")

In [24]:
NBA17_all

Unnamed: 0,home_TEAM_NAME,GAME_ID,home_GAME_DATE,home_MATCHUP,home_WL,home_MIN,home_PTS,home_FGM,home_FGA,home_FG_PCT,...,away_TOV,away_PF,away_PLUS_MINUS,away_WIN,away_HOME,TEAM_NAME,wpct_1st,pyth_wpct_1st,wpct_2nd,pyth_wpct_2nd
0,Atlanta Hawks,21700533,2017-12-30,ATL vs. POR,W,241.0,104,40,89,0.0,...,20,18,-15.0,0,0,Atlanta Hawks,0.282051,0.480409,0.317073,0.468870
1,Atlanta Hawks,21700508,2017-12-27,ATL vs. WAS,W,241.0,113,41,94,0.0,...,13,24,-14.0,0,0,Atlanta Hawks,0.282051,0.480409,0.317073,0.468870
2,Atlanta Hawks,21700484,2017-12-23,ATL vs. DAL,W,242.0,112,42,75,0.0,...,10,17,-5.0,0,0,Atlanta Hawks,0.282051,0.480409,0.317073,0.468870
3,Atlanta Hawks,21700455,2017-12-20,ATL vs. IND,L,239.0,95,35,81,0.0,...,9,18,10.0,1,0,Atlanta Hawks,0.282051,0.480409,0.317073,0.468870
4,Atlanta Hawks,21700443,2017-12-18,ATL vs. MIA,W,241.0,110,38,83,0.0,...,19,28,-6.0,0,0,Atlanta Hawks,0.282051,0.480409,0.317073,0.468870
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1143,Charlotte Hornets,21700687,2018-01-22,CHA vs. SAC,W,241.0,112,38,86,0.0,...,7,22,-5.0,0,0,Charlotte Hornets,0.378378,0.494014,0.488372,0.506432
1144,Charlotte Hornets,21700676,2018-01-20,CHA vs. MIA,L,241.0,105,41,90,0.0,...,14,17,1.0,1,0,Charlotte Hornets,0.378378,0.494014,0.488372,0.506432
1145,Charlotte Hornets,21700653,2018-01-17,CHA vs. WAS,W,241.0,133,48,98,0.0,...,16,23,-24.0,0,0,Charlotte Hornets,0.378378,0.494014,0.488372,0.506432
1146,Charlotte Hornets,21700629,2018-01-13,CHA vs. OKC,L,242.0,91,33,83,0.0,...,9,22,10.0,1,0,Charlotte Hornets,0.378378,0.494014,0.488372,0.506432


Fit two regressions using 1) Pythagorean winning % and 2) 1st half winning % as I.V respectively

In [25]:
#1
Est_Wpct_by_P = smf.glm(formula = "wpct_2nd ~ pyth_wpct_2nd", data = NBA17_all, family = sm.families.Binomial()).fit()
Est_Wpct_by_P.summary()

0,1,2,3
Dep. Variable:,wpct_2nd,No. Observations:,1148.0
Model:,GLM,Df Residuals:,1146.0
Model Family:,Binomial,Df Model:,1.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-481.58
Date:,"Sun, 17 Mar 2024",Deviance:,14.588
Time:,04:26:43,Pearson chi2:,14.4
No. Iterations:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-15.5174,1.411,-10.999,0.000,-18.282,-12.752
pyth_wpct_2nd,31.0223,2.816,11.018,0.000,25.504,36.541


In [26]:
est_Wpct = Est_Wpct_by_P.predict()
fittedWin = [1 if x > .5 else 0 for x in est_Wpct]
confusion_matrix(NBA17_all["home_WIN"], fittedWin)

array([[254, 225],
       [183, 486]])

In [27]:
Success_Rate = round( (254+486)/(254+486+225+183)*100, 2)
print( "Success Rate: " + str( Success_Rate ) + "%" )

Success Rate: 64.46%


In [28]:
#2
Est_Wpct_by_1st = smf.glm(formula = "wpct_2nd ~ wpct_1st", data = NBA17_all, family = sm.families.Binomial()).fit()
Est_Wpct_by_1st.summary()

0,1,2,3
Dep. Variable:,wpct_2nd,No. Observations:,1148.0
Model:,GLM,Df Residuals:,1146.0
Model Family:,Binomial,Df Model:,1.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-524.49
Date:,"Sun, 17 Mar 2024",Deviance:,100.42
Time:,04:26:47,Pearson chi2:,96.0
No. Iterations:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.5825,0.220,-7.188,0.000,-2.014,-1.151
wpct_1st,3.1766,0.427,7.439,0.000,2.340,4.014


In [29]:
est_Wpct_2 = Est_Wpct_by_1st.predict()
fittedWin_2 = [1 if x > .5 else 0 for x in est_Wpct_2]
confusion_matrix(NBA17_all["home_WIN"], fittedWin_2)

array([[300, 179],
       [254, 415]])

In [30]:
Success_Rate_2 = round((300 + 415)/ (300 + 415 + 179 + 254)*100, 2)
print( "Success Rate: " + str( Success_Rate_2 ) + "%" )

Success Rate: 62.28%


In [31]:
#3
Est_Wpct = smf.glm(formula = "wpct_2nd ~ wpct_1st + pyth_wpct_1st", data = NBA17_all, family = sm.families.Binomial()).fit()
Est_Wpct.summary()

0,1,2,3
Dep. Variable:,wpct_2nd,No. Observations:,1148.0
Model:,GLM,Df Residuals:,1145.0
Model Family:,Binomial,Df Model:,2.0
Link Function:,logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-517.37
Date:,"Sun, 17 Mar 2024",Deviance:,86.164
Time:,04:26:52,Pearson chi2:,82.6
No. Iterations:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-14.2341,3.415,-4.168,0.000,-20.927,-7.541
wpct_1st,-0.3238,1.023,-0.317,0.752,-2.328,1.681
pyth_wpct_1st,28.8065,7.742,3.721,0.000,13.632,43.981


In [32]:
#3
Est_Wpct = smf.ols(formula = "wpct_2nd ~ wpct_1st + pyth_wpct_1st", data = NBA17_all).fit()
Est_Wpct.summary()

0,1,2,3
Dep. Variable:,wpct_2nd,R-squared:,0.471
Model:,OLS,Adj. R-squared:,0.47
Method:,Least Squares,F-statistic:,509.4
Date:,"Sun, 17 Mar 2024",Prob (F-statistic):,5.61e-159
Time:,04:27:06,Log-Likelihood:,700.05
No. Observations:,1148,AIC:,-1394.0
Df Residuals:,1145,BIC:,-1379.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-2.7698,0.210,-13.162,0.000,-3.183,-2.357
wpct_1st,-0.0513,0.065,-0.790,0.430,-0.179,0.076
pyth_wpct_1st,6.5927,0.479,13.755,0.000,5.652,7.533

0,1,2,3
Omnibus:,115.151,Durbin-Watson:,0.058
Prob(Omnibus):,0.0,Jarque-Bera (JB):,36.623
Skew:,0.126,Prob(JB):,1.12e-08
Kurtosis:,2.162,Cond. No.,166.0


## The End