In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

In [22]:
import warnings
warnings.filterwarnings("ignore")

## Cobb-Douglas production function

The **Cobb-Douglas production function** is a mathematical model used in economics to show the relationship between the quantity of a product and the inputs used to produce it, such as 
* **labor** and 
* **capital**. 

It is expressed as 

$$ Q = A K^{\alpha} L^{\beta},$$

where 
- $Q$ is output, 
- $K$ is capital, 
- $L$ is labor, 
- $A$ is total factor productivity, and 
- $\alpha$ and $\beta$ are the output elasticities for capital and labor, respectively. 

This function is widely used to estimate how changes in inputs affect output and to analyze technological change.Â 

In [9]:
# Q = np.array([1989, 6922, 11957, 44019, 14616, 3688, 46747, 9223, 3945, 4984, 14507, 4996, 1833, 7972, 37135, 39157])
# K = np.array([3806, 49716, 13687, 172228, 24742, 2971, 111, 9223, 3945, 4984, 14507, 4996, 1833, 7972, 37135, 39157])
# L = np.array([1989, 6922, 11957, 44019, 14616, 3688, 46747, 9223, 3945, 4984, 14507, 4996, 1833, 7972, 37135, 39157])

Q = np.array([1989, 6922, 11957, 44019, 14616, 3688, 46747, 9223 ])
K = np.array([3606, 49716, 13687, 172228, 24742, 2971, 111341, 34610])
L = np.array([7.6, 8, 27.4, 20.1, 20.1, 15.2, 22.1, 17.9])
I = np.array([1, 1, 1, 1, 1, 1, 1, 1])

In [5]:
np.log(K)

array([ 8.1903544 , 10.81408209,  9.52420176, 12.05657446, 10.11625748,
        7.99665388, 11.62035284, 10.45189794])

In [21]:
x = np.array([I, np.log(K), np.log(L)]) @ np.array([I, np.log(K), np.log(L)]).T
x

array([[  8.        ,  80.77037485,  22.12124618],
       [ 80.77037485, 830.65759289, 225.04784453],
       [ 22.12124618, 225.04784453,  62.71592099]])

In [20]:
np.linalg.inv(x)

array([[ 8.80355523, -0.53012272, -1.2029264 ],
       [-0.53012272,  0.07520683, -0.08288422],
       [-1.2029264 , -0.08288422,  0.73766191]])

In [16]:
print(np.linalg.inv(x),"\n", np.linalg.inv(x) @ x)

[[ 8.80355523 -0.53012272 -1.2029264 ]
 [-0.53012272  0.07520683 -0.08288422]
 [-1.2029264  -0.08288422  0.73766191]] 
 [[ 1.00000000e+00 -2.98699619e-13 -7.95652970e-14]
 [ 1.90421963e-15  1.00000000e+00  3.38216036e-15]
 [-7.67334624e-17  2.98650270e-14  1.00000000e+00]]


In [22]:
beta = np.linalg.inv(x) @ np.array([I, np.log(K), np.log(L)]) @ np.log(Q)
print( beta)

[0.91888887 0.5460661  1.02828269]


## Seasonal approach

In [4]:
import statsmodels.formula.api as smf

In [19]:
from IPython.display import display, Latex

In [7]:
y = np.array([2, 2, 3, 4, 3, 4, 5, 5, 3, 4, 3, 2])
x1 = np.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
x2 = np.array([0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0])
x3 = np.array([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0])
I = np.ones(12)
k=3

In [9]:
x = np.array([I, x1, x2, x3]) @ np.array([I, x1, x2, x3]).T
x

array([[12.,  3.,  3.,  3.],
       [ 3.,  3.,  0.,  0.],
       [ 3.,  0.,  3.,  0.],
       [ 3.,  0.,  0.,  3.]])

In [12]:
np.linalg.inv(x)

array([[ 0.33333333, -0.33333333, -0.33333333, -0.33333333],
       [-0.33333333,  0.66666667,  0.33333333,  0.33333333],
       [-0.33333333,  0.33333333,  0.66666667,  0.33333333],
       [-0.33333333,  0.33333333,  0.33333333,  0.66666667]])

In [11]:
beta = np.linalg.inv(x) @ np.array([I, x1, x2, x3]) @ y
print( beta)

[ 3.33333333e+00 -1.33333333e+00  1.11022302e-16  1.33333333e+00]


In [63]:
sum([num ** 2 for num in e])

np.float64(2.0)

In [23]:
e = y - beta @ np.array([I, x1, x2, x3]) # unbiased estimate of error
sigma2 = sum([num ** 2 for num in e])/(len(y)-k-1)
sigma = np.sqrt(sigma2)

display(Latex(f"$\sum  e^2 = {sum([num ** 2 for num in e])}$"))
display(Latex(r"$\sigma^2 = \frac{1}{n-p-1}\sum e^2" + f" =  {sigma2}$"))
display(Latex(f"$\sigma = {sigma}$"))

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

In [44]:
sigma_0 = np.sqrt( sigma2 * np.linalg.inv(x)[0, 0])
sigma_1 = np.sqrt( sigma2 * np.linalg.inv(x)[1, 1])
sigma_2 = np.sqrt( sigma2 * np.linalg.inv(x)[2, 2])
sigma_3 = np.sqrt( sigma2 * np.linalg.inv(x)[3, 3])
# sigma2 = 7.06

display(Latex(f"$\sigma^2_1 = " + r"\sigma^2x^{-1}_{11}" + f" =  {sigma_1**2}$"))
display(Latex(f"$\sigma_1 =  {sigma_1}$"))
display(Latex(f"$\sigma^2_2 = " + r"\sigma^2x^{-1}_{22}" + f" =  {sigma_2**2}$"))
display(Latex(f"$\sigma_2 =  {sigma_2}$"))
display(Latex(f"$\sigma^2_3 = " + r"\sigma^2x^{-1}_{33}" + f" =  {sigma_2**2}$"))
display(Latex(f"$\sigma_3 =  {sigma_3}$"))

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

In [65]:
np.mean(y)

np.float64(3.3333333333333335)

In [69]:
msr = sum([num**2 for num in beta @ np.array([I, x1, x2, x3])-np.mean(y)])/k
msr

np.float64(3.555555555555555)

In [64]:
sum([num**2 for num in beta @ np.array([I, x1, x2, x3])-np.mean(y)])

np.float64(10.666666666666664)

In [37]:
msr

np.float64(3.555555555555555)

In [40]:
mse = sigma2

In [41]:
msr/sigma2

np.float64(14.22222222222222)

In [13]:
# Convert to pandas DataFrame for easier handling, especially with the formula API
df = pd.DataFrame(np.array([x1, x2, x3]).T, columns=['x1', 'x2', 'x3'])
df['y'] = y

In [14]:
# 2. Add a constant to the independent variables for the intercept term
# This is necessary when using the non-formula API (sm.OLS)
X_with_constant = sm.add_constant(df[['x1', 'x2', 'x3']])

In [15]:
# Method 2: Using the formula API (often more convenient)
# Requires 'statsmodels.formula.api'
model_formula = smf.ols("y ~ x1 + x2 + x3", data=df)
results_formula = model_formula.fit()

In [16]:
print("Results from formula OLS:")
print(results_formula.summary())

Results from formula OLS:
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.842
Model:                            OLS   Adj. R-squared:                  0.783
Method:                 Least Squares   F-statistic:                     14.22
Date:                Sat, 15 Nov 2025   Prob (F-statistic):            0.00143
Time:                        13:19:00   Log-Likelihood:                -6.2767
No. Observations:                  12   AIC:                             20.55
Df Residuals:                       8   BIC:                             22.49
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      3.3333     

In [45]:
print(sigma_0, sigma_1, sigma_2, sigma_3)
print(beta[0]/sigma_0, beta[1]/sigma_1, beta[2]/sigma_2, beta[3]/sigma_3)

0.28867513459481287 0.408248290463863 0.408248290463863 0.408248290463863
11.547005383792515 -3.265986323710904 2.7194799110210365e-16 3.2659863237109032


In [70]:
np.sqrt(1/6)

np.float64(0.408248290463863)

In [46]:
from scipy.stats import t, f

In [62]:
print(2*(1-t.cdf(beta[0]/sigma_0, len(y)-k-1)), 2*t.cdf(beta[1]/sigma_1, len(y)-k-1), 2*t.cdf(beta[2]/sigma_2, len(y)-k-1), 2*(1-t.cdf(beta[3]/sigma_3, len(y)-k-1)))

2.872879641957482e-06 0.01142455437783389 1.0000000000000002 0.011424554377833962


In [None]:
2*t.cdf(beta[1]/sigma_1, len(y)-k-1)