In [4]:

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import io
import scipy.stats as stats
import re

1 F-тест: значимость регрессии

In [10]:
data_string='''Observations      706        706         706         706    
R2              0.132       0.131       0.128       0.007   
Adjusted_R2     0.121       0.123       0.121       0.002  '''

In [17]:
df=pd.read_csv(io.StringIO(data_string), sep='\s+', header=None,index_col=0)
df

Unnamed: 0_level_0,1,2,3,4
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Observations,706.0,706.0,706.0,706.0
R2,0.132,0.131,0.128,0.007
Adjusted_R2,0.121,0.123,0.121,0.002


In [19]:
df.reset_index()

Unnamed: 0,0,1,2,3,4
0,Observations,706.0,706.0,706.0,706.0
1,R2,0.132,0.131,0.128,0.007
2,Adjusted_R2,0.121,0.123,0.121,0.002


In [21]:
df=df.transpose()
df

Unnamed: 0,Observations,R2,Adjusted_R2
1,706.0,0.132,0.121
2,706.0,0.131,0.123
3,706.0,0.128,0.121
4,706.0,0.007,0.002


In [22]:
df['m']=[10,8,6,5]
df

Unnamed: 0,Observations,R2,Adjusted_R2,m
1,706.0,0.132,0.121,10
2,706.0,0.131,0.123,8
3,706.0,0.128,0.121,6
4,706.0,0.007,0.002,5


In [37]:
df['F_obs']=df['R2']/(1-df['R2'])*(df['Observations']-df['m'])/(df['m']-1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_obs,F_cr,Znachimosty
1,706.0,0.132,0.121,10,11.760369,1.893317,znachim
2,706.0,0.131,0.123,8,15.031728,2.022681,znachim
3,706.0,0.128,0.121,6,20.550459,2.226901,znachim
4,706.0,0.007,0.002,5,1.235398,2.384638,znachim


In [40]:
alpha=0.05
df['F_cr']=stats.f.ppf(1-alpha,df['m']-1,df['Observations']-df['m'])
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_obs,F_cr,Znachimosty
1,706.0,0.132,0.121,10,11.760369,1.893317,znachim
2,706.0,0.131,0.123,8,15.031728,2.022681,znachim
3,706.0,0.128,0.121,6,20.550459,2.226901,znachim
4,706.0,0.007,0.002,5,1.235398,2.384638,ne znachim


In [41]:
df['Znachimosty']=df.apply(lambda x: 'znachim' if x['F_obs']>x['F_cr'] else 'ne znachim', axis=1)
df

Unnamed: 0,Observations,R2,Adjusted_R2,m,F_obs,F_cr,Znachimosty
1,706.0,0.132,0.121,10,11.760369,1.893317,znachim
2,706.0,0.131,0.123,8,15.031728,2.022681,znachim
3,706.0,0.128,0.121,6,20.550459,2.226901,znachim
4,706.0,0.007,0.002,5,1.235398,2.384638,ne znachim


$$H_0: \beta_{0}=\beta_{1}+\dots+\beta_{k}=0$$
$$H_1: \beta_{0}^2=\beta_{1}^2+\dots+\beta_{k}^2>0$$

2.1 sleep equation #1

$$H_0: \beta_{marr}=\beta_{union}=\beta_{ynkid}=\beta_{smsa}=0$$
$$H_1: \beta_{marr}^2+\beta_{union}^2+\beta_{ynkid}^2+\beta_{smsa}^2=0$$

In [44]:
unrest,rest=np.float_(re.findall(r'([-+]?\d+.\d+)', 'R2                      0.131          0.127'))
print(unrest,rest)                                 

0.131 0.127


In [46]:
F_obs=(unrest-rest)/(1-unrest)*(706-9)/(4)
F_obs.round(3)

0.802

In [48]:
alpha=0.05
F_cr=stats.f.ppf(1-alpha, 4,706-9)
F_cr.round(3)

2.385

$F_{obs}<F_{cr}$ следовательно данные согласуются с гипотезой $H_0$. Коэффициенты совместно незначимы

# Задача 3.1.1

In [51]:
df=pd.read_csv('https://raw.githubusercontent.com/artamonoff/Econometrica/master/python-notebooks/data-csv/Electricity.csv')
df

Unnamed: 0,cost,q,pl,sl,pk,sk,pf,sf
0,0.2130,8.0,6869.47,0.3291,64.945,0.4197,18.000,0.2512
1,3.0427,869.0,8372.96,0.1030,68.227,0.2913,21.067,0.6057
2,9.4059,1412.0,7960.90,0.0891,40.692,0.1567,41.530,0.7542
3,0.7606,65.0,8971.89,0.2802,41.243,0.1282,28.539,0.5916
4,2.2587,295.0,8218.40,0.1772,71.940,0.1623,39.200,0.6606
...,...,...,...,...,...,...,...,...
153,6.8293,946.6,10642.16,0.0883,43.600,0.1914,51.463,0.7203
154,3.7605,377.0,7432.24,0.2117,74.120,0.2274,33.436,0.5609
155,3.9822,391.0,5826.04,0.1926,78.288,0.0924,44.633,0.7151
156,30.1880,5317.0,9586.63,0.0845,78.008,0.2009,41.840,0.7147


In [67]:
elec_eq=smf.ols(data=df, formula='np.log(cost)~np.log(q)+np.log(q)**2+np.log(pl)+np.log(pk)+np.log(pf)').fit()
                