In [1]:
from sklearn.linear_model import LinearRegression
from statsmodels.stats.diagnostic import het_white
import statsmodels.api as sm
import pandas as pd

#define URL where dataset is located
url = "https://raw.githubusercontent.com/Statology/Python-Guides/main/mtcars.csv"

#read in data
data = pd.read_csv(url)

## FIT regresion model

In [2]:
#define response variable
y = data['mpg']

#define predictor variables
x = data[['disp', 'hp']]

#add constant to predictor variables
x = sm.add_constant(x)

#fit regression model
model = sm.OLS(y, x).fit()

In [3]:
#perform White's test
white_test = het_white(model.resid,  model.model.exog)

#define labels to use for output of White's test
labels = ['Test Statistic', 'Test Statistic p-value', 'F-Statistic', 'F-Test p-value']

#print results of White's test
print(dict(zip(labels, white_test)))


{'Test Statistic': 7.076620330416613, 'Test Statistic p-value': 0.2150040439426402, 'F-Statistic': 1.4764621093131827, 'F-Test p-value': 0.23147065943879863}


White’s test uses the following null and alternative hypotheses:

Null (H0): Homoscedasticity is present (residuals are equally scattered)
Alternative (HA): Heteroscedasticity is present (residuals are not equally scattered)

if the p-value is not less than 0.05, we fail to reject the null hypothesis.
This means we do not have sufficient evidence to say that heteroscedasticity is present in the regression model.