# Core Statistics Using Python
### Hana Choi, Simon Business School, University of Rochester


# Testing Multiple Restrictions with F-Tests

## Topics covered

- Method 1: Look at the F-stat and p-value in summary
- Method 2: Compute F-stat by hand
- Method 3: Run F test using ANOVA

## Required packages

In [None]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats import f

# House Prices Example


## Unrestricted model with three regressors (full model)

In [None]:
# Load hprices2.csv dataset
hprices2 = pd.read_csv("/Users/hanachoi/Dropbox/teaching/core_statistics/Data/hprices2.csv")

# Display first few rows of the dataframe
print(hprices2.head())
print('----')

# Unrestricted model with three regressors (full model)
hprices_full_model = smf.ols('price ~ sqrft + lotsize + bdrms', data=hprices2).fit()
print(hprices_full_model.summary().tables[1])

## Test $H_0: \beta_1 = \beta_2 = \beta_3 = 0$

### Restricted model with intercept only

In [None]:
hprices_model0 = smf.ols('price ~ 1', data=hprices2).fit()
print(hprices_model0.summary().tables[1])

### Method 1: Look at the F-stat and p-value in summary (testing all coefficients = 0)

In [None]:
# F-statistic
f_statistic = hprices_full_model.fvalue
print("F-statistic:", f_statistic)

# p-value for F-statistic
f_pvalue = hprices_full_model.f_pvalue # This value is reported as Prob(F-statistic) in the summary table.
print("p-value for F-statistic:", f_pvalue)

### Method 2: Compute F-stat by hand

In [None]:
# R-squared values
R2_U = hprices_full_model.rsquared
R2_0 = hprices_model0.rsquared

# Number of observations and restrictions imposed under the null
n = len(hprices2)
p_full = 3  # Three predictors: sqrft, lotsize, bdrms
q_0 = 3 # The number of restrictions imposed under the null

# F-statistic for full model vs. model0
fstat_full_vs_0 = ((R2_U - R2_0) / q_0) / ((1 - R2_U) / (n - p_full - 1))
print("F-statistic:", fstat_full_vs_0)

# p-value for full model vs. model0
p_value_full_vs_0 = 1 - f.cdf(fstat_full_vs_0, q_0, n - p_full - 1)
print("p-value for F-statistic:", p_value_full_vs_0)

### Method 3: Run F test using ANOVA

In [None]:
# Compare two nested models
anova_results = sm.stats.anova_lm(hprices_model0, hprices_full_model)
print(anova_results)

## Test $H_0: \beta_2 = \beta_3 = 0$

### Restricted model with only sqrft

In [None]:
hprices_model1 = smf.ols('price ~ sqrft', data=hprices2).fit()
print(hprices_model1.summary().tables[1])

### Method 2: Compute F-stat by hand

In [None]:
# R-squared 
R2_1 = hprices_model1.rsquared

# Number of restrictions imposed under the null
q_1 = 2 # The number of restrictions imposed under the null

# F-statistic for full model vs. model1
fstat_full_vs_1 = ((R2_U - R2_1) / q_1) / ((1 - R2_U) / (n - p_full - 1))
print("F-statistic:", fstat_full_vs_1)

# p-value for full model vs. model0
p_value_full_vs_1 = 1 - f.cdf(fstat_full_vs_1, q_1, n - p_full - 1)
print("p-value for F-statistic:", p_value_full_vs_1)

### Method 3: Run F test using ANOVA

In [None]:
# Compare two nested models
anova_results = sm.stats.anova_lm(hprices_model1, hprices_full_model)
print(anova_results)

# RFJ Example

## Unrestricted model with p1, p2, p3 (full model)

In [None]:
# Load rfj_small.csv dataset
rfj_small = pd.read_csv("/Users/hanachoi/Dropbox/teaching/core_statistics/Data/rfj_small.csv")

# Display first few rows of the dataframe
print(rfj_small.head())
print('----')

# Unrestricted model (full model)
rfj_full_model = smf.ols('q1 ~ p1 + p2 + p3', data=rfj_small).fit()
print(rfj_full_model.summary().tables[1])

## Test $H_0: \beta_2 = \beta_3 = 0$

- This is testing whether Tropicana is monopoly

### Restricted model with only p1

In [None]:
rfj_model2 = smf.ols('q1 ~ p1', data=rfj_small).fit()
print(rfj_model2.summary().tables[1])

### Method 2: Compute F-stat by hand

In [None]:
# R-squared values
R2_U_rfj = rfj_full_model.rsquared
R2_2 = rfj_model2.rsquared

# Number of observations and restrictions imposed under the null
n_rfj = len(rfj_small)
p_full_rfj = 3  # Three predictors: p1, p2, p3
q_2 = 2 # The number of restrictions imposed under the null

# F-statistic for full model vs. model1
fstat_full_vs_2 = ((R2_U_rfj - R2_2) / q_2) / ((1 - R2_U_rfj) / (n_rfj - p_full_rfj - 1))
print("F-statistic:", fstat_full_vs_2)

# p-value for full model vs. model0
p_value_full_vs_2 = 1 - f.cdf(fstat_full_vs_2, q_2, n_rfj - p_full_rfj - 1)
print("p-value for F-statistic:", p_value_full_vs_2)

### Method 3: Run F test using ANOVA

In [None]:
# Compare two nested models
anova_results = sm.stats.anova_lm(rfj_model2, rfj_full_model)
print(anova_results)

## Test $H_0: \beta_2 = \beta_3$

- This is testing whether Minute Maid and Private Label are symmetric


### Restricted model with symmetric rivals

In [None]:
rfj_model3 = smf.ols('q1 ~ p1 + I(p2+p3)', data=rfj_small).fit()
print(rfj_model3.summary().tables[1])

### Method 2: Compute F-stat by hand

In [None]:
# R-squared values
R2_3 = rfj_model3.rsquared

# Number restrictions imposed under the null
q_3 = 1

# F-statistic for full model vs. model1
fstat_full_vs_3 = ((R2_U_rfj - R2_3) / q_3) / ((1 - R2_U_rfj) / (n_rfj - p_full_rfj - 1))
print("F-statistic:", fstat_full_vs_3)

# p-value for full model vs. model0
p_value_full_vs_3 = 1 - f.cdf(fstat_full_vs_3, q_3, n_rfj - p_full_rfj - 1)
print("p-value for F-statistic:", p_value_full_vs_3)

### Method 3: Run F test using ANOVA

In [None]:
# Compare two nested models
anova_results = sm.stats.anova_lm(rfj_model3, rfj_full_model)
print(anova_results)