In [2]:
import numpy as np # linear algebra 
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# stats models: regression fitting via formulas
import statsmodels.formula.api as smf
# stats models: regression fitting via matrices of regression design
import statsmodels.api as sm

# Sleep equation
# Модель 1.1

In [4]:
df = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/sleep75.csv')
df

Unnamed: 0,age,black,case,clerical,construc,educ,earns74,gdhlth,inlf,leis1,...,spwrk75,totwrk,union,worknrm,workscnd,exper,yngkid,yrsmarr,hrwage,agesq
0,32,0,1,0.000000,0.000000,12,0,0,1,3529,...,0,3438,0,3438,0,14,0,13,7.070004,1024
1,31,0,2,0.000000,0.000000,14,9500,1,1,2140,...,0,5020,0,5020,0,11,0,0,1.429999,961
2,44,0,3,0.000000,0.000000,17,42500,1,1,4595,...,1,2815,0,2815,0,21,0,0,20.529997,1936
3,30,0,4,0.000000,0.000000,12,42500,1,1,3211,...,1,3786,0,3786,0,12,0,12,9.619998,900
4,64,0,5,0.000000,0.000000,14,2500,1,1,4052,...,1,2580,0,2580,0,44,0,33,2.750000,4096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
701,45,0,702,0.182331,0.030075,12,5500,1,0,5069,...,1,2026,0,2026,0,27,0,18,,2025
702,34,0,703,0.182331,0.030075,10,2500,0,0,5885,...,0,675,1,465,210,18,0,4,,1156
703,37,0,704,0.182331,0.030075,12,3500,1,0,4719,...,1,1851,0,1851,0,19,0,17,,1369
704,54,0,705,0.182331,0.030075,17,32500,1,0,5149,...,1,1961,1,1481,480,31,0,22,,2916


## Спецификация модели:
$$
sleep = \beta_0 + \beta_1*totwrk + \beta_2*male
$$

In [4]:
# специфицируем модель через формулу
sleep_eq1 = smf.ols(formula='sleep~totwrk+male', data=df).fit()
# Коэфициенты модели с округление
sleep_eq1.params.round(2)

Intercept    3573.20
totwrk         -0.17
male           88.84
dtype: float64

$$
sleep = 3573.2 - 0.17*totwrk + 88.84*male
$$

In [6]:
sleep_eq1.summary()

0,1,2,3
Dep. Variable:,sleep,R-squared:,0.112
Model:,OLS,Adj. R-squared:,0.109
Method:,Least Squares,F-statistic:,44.21
Date:,"Wed, 01 Mar 2023",Prob (F-statistic):,8.22e-19
Time:,09:30:32,Log-Likelihood:,-5263.8
No. Observations:,706,AIC:,10530.0
Df Residuals:,703,BIC:,10550.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,3573.2044,39.090,91.409,0.000,3496.456,3649.952
totwrk,-0.1682,0.018,-9.349,0.000,-0.204,-0.133
male,88.8352,34.386,2.584,0.010,21.325,156.346

0,1,2,3
Omnibus:,64.996,Durbin-Watson:,1.951
Prob(Omnibus):,0.0,Jarque-Bera (JB):,179.109
Skew:,-0.46,Prob(JB):,1.28e-39
Kurtosis:,5.29,Cond. No.,5890.0


### Интерпретация
1. При увеличении количества рабочих минут в неделю на единицу, кол-во сна в неделю меньшается на 0.17, при прочих равных
2. Мужчины спят в среднем на 88.84 минуты больше

# Модель 1.2

## Спецификация модели:
$$
sleep=\beta_0+\beta_1*totwrk+\beta_2*male+\beta_3*smsa+\beta_4*age+\beta_5*south+\beta_6*yngkid+\beta_7*marr+\beta_8*union
$$

In [7]:
# специфицируем модель через формулу
sleep_eq2 = smf.ols(formula='sleep~totwrk+male+smsa+age+south+yngkid+marr+union', data=df).fit()
# Коэфициенты модели с округление
sleep_eq2.params.round(2)

Intercept    3446.83
totwrk         -0.17
male           87.11
smsa          -54.19
age             2.71
south         102.27
yngkid        -13.05
marr           31.36
union          11.87
dtype: float64

### Интерпретация
1. При увеличении количества рабочих минут в неделю на единицу, кол-во сна в неделю меньшается на 0.17, при прочих равных
2. Мужчины спят в среднем на 87.11 минуты в неделю больше


# Модель 1.3

## Спецификация модели:
$$
sleep=\beta_0+\beta_1*totwrk+\beta_2*male+\beta_3*smsa+\beta_4*age+\beta_5*south+\beta_6*yngkid+\beta_7*marr+\beta_8*log(hrwage)
$$

In [11]:
# специфицируем модель через формулу
sleep_eq2 = smf.ols(formula='sleep~totwrk+male+smsa+age+south+yngkid+marr+np.log(hrwage)', data=df).fit()
# Коэфициенты модели с округление
sleep_eq2.params.round(2)

Intercept         3440.19
totwrk              -0.16
male                36.87
smsa               -36.96
age                  2.37
south               76.27
yngkid              47.92
marr                53.34
np.log(hrwage)      -1.39
dtype: float64

### Интерпретация
1. Человек, проживающий в мегаполисе, спит в среднем на 36.96 минут в неделю меньше
2. При изменении почасовой оплаты на 1% человек спит на 0.00139 минуты в неделю меньше

# Wage equation
# Модель 2.1

In [17]:
df_wage = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/wage2.csv')
df_wage

Unnamed: 0,wage,hours,IQ,KWW,educ,exper,tenure,age,married,black,south,urban,sibs,brthord,meduc,feduc,lwage
0,769,40,93,35,12,11,2,31,1,0,0,1,1,2.0,8.0,8.0,6.645091
1,808,50,119,41,18,11,16,37,1,0,0,1,1,,14.0,14.0,6.694562
2,825,40,108,46,14,11,9,33,1,0,0,1,1,2.0,14.0,14.0,6.715384
3,650,40,96,32,12,13,7,32,1,0,0,1,4,3.0,12.0,12.0,6.476973
4,562,40,74,27,11,14,5,34,1,0,0,1,10,6.0,6.0,11.0,6.331502
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
930,520,40,79,28,16,6,1,30,1,1,1,0,0,1.0,11.0,,6.253829
931,1202,40,102,32,13,10,3,31,1,0,1,1,7,7.0,8.0,6.0,7.091742
932,538,45,77,22,12,12,10,28,1,1,1,0,9,,7.0,,6.287858
933,873,44,109,25,12,12,12,28,1,0,1,0,1,1.0,,11.0,6.771935


## Спецификация: 
$$
log(wage) = \beta_0 + \beta_1*age + \beta_2*IQ
$$

In [18]:
labour_eq = smf.ols(formula='np.log(wage)~age+IQ', data=df_wage).fit()
labour_eq.params.round(3)

Intercept    5.077
age          0.024
IQ           0.009
dtype: float64

### Интерпретация
1. Прирост возраста на 1 год увеличивает месячную зарплату на 2.4%
2. Увеличение результата теста IQ на 1 балл увеличивает месячную зарплату на 0.9%

# Модель 2.2

## Спецификация: 
$$
log(wage) = \beta_0 + \beta_1*age + \beta_2*IQ + \beta_3*south + \beta_4*urban + \beta_5*married + \beta_6*KWW
$$

In [19]:
wage_eq = smf.ols(formula='np.log(wage)~age+IQ+south+urban+married+KWW', data=df_wage).fit()
wage_eq.params.round(3)

Intercept    5.126
age          0.014
IQ           0.007
south       -0.101
urban        0.165
married      0.191
KWW          0.007
dtype: float64

### Интерпретация
1. При увеличении возраста на 1 год, зарплата увеличивается на 1,4% при прочих равных
2. При увеличении IQ на 1 единицу, зарплата возрастает на 0.7% при прочих равных условиях

# Output equation
# Модель 3.1

In [20]:
df_lab = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Labour.csv')
df_lab

Unnamed: 0,capital,labour,output,wage
0,2.606563,184,9.250759,43.080307
1,1.323237,91,3.664310,27.780016
2,22.093692,426,28.781516,44.467748
3,10.737851,72,4.124642,39.734710
4,1.161365,46,2.890150,34.650709
...,...,...,...,...
564,2.625403,20,1.424376,33.477545
565,1.276386,61,2.109048,26.300732
566,1.953869,117,6.241870,41.153979
567,1.318527,46,7.902237,66.720139


## Спецификация: 
$$
log(output) = \beta_0 + \beta_1*log(capital) + \beta_2*log(labour)
$$

In [21]:
labour_eq = smf.ols(formula='np.log(output)~np.log(capital)+np.log(labour)', data=df_lab).fit()
labour_eq.params.round(3)

Intercept         -1.711
np.log(capital)    0.208
np.log(labour)     0.715
dtype: float64

### Интерпретация
1. При увеличении количества капитала на 1%, кол-во выпуска увеличивается на 0.208%, при прочих равных
2. При увеличении количества рабочих на 1%, кол-во выпуска возрастает на 0.715% при прочих равных условиях

$$
log(output)=-1.711+0.208*log(capital)+0.715*log(labour)
$$

# Модель 3.2

## Спецификация: 
$$
log(output) = \beta_0 + \beta_1*log(capital) + \beta_2*log(labour) + \beta_3*log(wage)
$$

In [24]:
labour_eq = smf.ols(formula='np.log(output)~np.log(capital)+np.log(labour)+np.log(wage)', data=df_lab).fit()
labour_eq.params.round(3)

Intercept         -5.007
np.log(capital)    0.149
np.log(labour)     0.720
np.log(wage)       0.921
dtype: float64

### Интерпретация
1. При увеличении количества капитала на 1%, кол-во выпуска увеличивается на 0.149%, при прочих равных
2. При увеличении количества рабочих на 1%, кол-во выпуска возрастает на 0.72% при прочих равных условиях

$$
log(output)=-5.007+0.149*log(capital)+0.72*log(labour)+0.921*log(wage)
$$

# Cost equation
# Модель 4.1

In [26]:
df_elec = pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Electricity.csv')
df_elec

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


## Спецификация: 
$$
log(cost) = \beta_0 + \beta_1*log(q)
$$

In [29]:
electricity_eq = smf.ols(formula='np.log(cost)~np.log(q)', data=df_elec).fit()
electricity_eq.params.round(3)

Intercept   -3.841
np.log(q)    0.836
dtype: float64

### Интерпретация
1. При увеличении общего выпуска электроэнергии на 1%, общие издержки за год увеличиваются на 0.836%

$$
log(cost) = -3.841 + 0.836*log(q)
$$

# Модель 4.2

## Спецификация: 
$$
log(cost) = \beta_0 + \beta_1*log(q) + \beta_2*log(pl) + \beta_3*log(pf) + \beta_3*log(pk)
$$

In [31]:
electricity_eq = smf.ols(formula='np.log(cost)~np.log(q)+np.log(pl)+np.log(pf)+np.log(pk)', data=df_elec).fit()
electricity_eq.params.round(3)

Intercept    -7.472
np.log(q)     0.838
np.log(pl)    0.044
np.log(pf)    0.713
np.log(pk)    0.188
dtype: float64

### Интерпретация
1. При увеличении общего выпуска электроэнергии на 1%, общие издержки за год увеличиваются на 0.838%

$$
log(cost) = -7.472 + 0.838*log(q) + 0.044*log(pl) + 0.713*log(pf) + 0.188*log(pk)
$$