## List 6

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import io
import statsmodels.formula.api as smf
import scipy.stats as stats

In [2]:
data_string = '''totwrk                       -0.169***         
                              (0.018)          
                                               
age                           2.689*           
                              (1.469)          
                                               
south                        101.568**         
                             (41.837)          
                                               
male                         87.669**          
                             (35.104)          
                                               
smsa                         -54.748*          
                             (33.123)          
                                               
yngkid                        -13.962          
                             (50.341)          
                                               
marr                          31.211           
                             (42.233)          
                                               
Constant                    3450.913***        
                             (80.726)  '''

In [4]:
# Строку с данными можно разбить на 8 подстрок в 3 мя элементами в каждой, разделённых различными количеством пробелом.
# С этим справится метод reshape() из библиотеки numpy
np.reshape(data_string.strip().split(), (8,3) )

array([['totwrk', '-0.169***', '(0.018)'],
       ['age', '2.689*', '(1.469)'],
       ['south', '101.568**', '(41.837)'],
       ['male', '87.669**', '(35.104)'],
       ['smsa', '-54.748*', '(33.123)'],
       ['yngkid', '-13.962', '(50.341)'],
       ['marr', '31.211', '(42.233)'],
       ['Constant', '3450.913***', '(80.726)']], dtype='<U11')

In [6]:
df = pd.DataFrame(np.reshape(data_string.strip().split(), (8,3) ), columns=['Coeff','Estimate','std.err. str'])
df

Unnamed: 0,Coeff,Estimate,std.err. str
0,totwrk,-0.169***,(0.018)
1,age,2.689*,(1.469)
2,south,101.568**,(41.837)
3,male,87.669**,(35.104)
4,smsa,-54.748*,(33.123)
5,yngkid,-13.962,(50.341)
6,marr,31.211,(42.233)
7,Constant,3450.913***,(80.726)


In [7]:
# оценки
df['Estimate_numb'] = df['Estimate'].str.extract(r'([-+]?\d+.\d+)').astype(float)
df['Std.error'] = df['std.err. str'].str.extract(r'([-+]?\d+.\d+)').astype(float)
df

Unnamed: 0,Coeff,Estimate,std.err. str,Estimate_numb,Std.error
0,totwrk,-0.169***,(0.018),-0.169,0.018
1,age,2.689*,(1.469),2.689,1.469
2,south,101.568**,(41.837),101.568,41.837
3,male,87.669**,(35.104),87.669,35.104
4,smsa,-54.748*,(33.123),-54.748,33.123
5,yngkid,-13.962,(50.341),-13.962,50.341
6,marr,31.211,(42.233),31.211,42.233
7,Constant,3450.913***,(80.726),3450.913,80.726


In [9]:
alpha = 0.1
t_cr = stats.t.ppf(1 - alpha/2, 706-8 )
t_cr

1.6470395952054095

In [10]:
df['low'] = (df['Estimate_numb'] - df['Std.error']*t_cr).round(2)
df['up'] = (df['Estimate_numb'] + df['Std.error']*t_cr).round(2)
df

Unnamed: 0,Coeff,Estimate,std.err. str,Estimate_numb,Std.error,low,up
0,totwrk,-0.169***,(0.018),-0.169,0.018,-0.2,-0.14
1,age,2.689*,(1.469),2.689,1.469,0.27,5.11
2,south,101.568**,(41.837),101.568,41.837,32.66,170.48
3,male,87.669**,(35.104),87.669,35.104,29.85,145.49
4,smsa,-54.748*,(33.123),-54.748,33.123,-109.3,-0.19
5,yngkid,-13.962,(50.341),-13.962,50.341,-96.88,68.95
6,marr,31.211,(42.233),31.211,42.233,-38.35,100.77
7,Constant,3450.913***,(80.726),3450.913,80.726,3317.95,3583.87


$$
P(\beta_1 - t_{cr} * s_1 < \beta <\beta_1 + t_{cr} * s_1) = \gamma
$$