In [None]:
library("lmtest")
library("GGally")
library("car")
library("rlms")
library("dplyr")
library("sandwich")

In [None]:
data <- rlms_read("r23i_os26c.sav")

In [11]:
data = select(
    data,
    sh5,      # пол
    s_age,    # возраст
    s_marst,  # семейное положение
    s_diplom, # образование
    status,   # тип населенного пункта
    sj13.2,   # средняя зарплата за последний год
    sj6.2)    # средняя рабочая неделя
data = na.omit(data)
glimpse(data)

Rows: 4,572
Columns: 7
$ sh5      [3m[90m<dbl+lbl>[39m[23m 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, …
$ s_age    [3m[90m<dbl+lbl>[39m[23m 59, 46, 56, 60, 61, 60, 61, 59, 51, 45, 33, 52, 51, 52, 5…
$ s_marst  [3m[90m<dbl+lbl>[39m[23m 5, 2, 2, 2, 5, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 4, 5, 2, 5, …
$ s_diplom [3m[90m<dbl+lbl>[39m[23m 4, 4, 4, 4, 5, 5, 4, 2, 4, 4, 3, 6, 6, 4, 5, 5, 4, 6, 5, …
$ status   [3m[90m<dbl+lbl>[39m[23m 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, …
$ sj13.2   [3m[90m<dbl+lbl>[39m[23m 25000, 15000, 16000, 23000, 13000,  8000, 20000, 12000, 2…
$ sj6.2    [3m[90m<dbl+lbl>[39m[23m 48, 40, 37, 50, 48, 33, 60, 48, 36, 40, 40, 40, 40, 48, 2…


In [13]:
data_normalized = select(data,) # База данных для нормализованных значений

In [14]:
# пол
data_normalized["sex"] = 0
data_normalized$sex[which(data$sh5 == 1)] <- 1

In [15]:
# возраст
age = data$s_age
data_normalized["age"] = (age - mean(age)) / sqrt(var(age))

In [16]:
# семейное положение

# в браке/не в браке
data_normalized$wed1 = 0
data_normalized$wed1[which(data$s_marst==2)] <- 1
data_normalized$wed1[which(data$s_marst==6)] <- 1

# разведен/вдовец
data_normalized$wed2 = 0
data_normalized$wed2[which(data$s_marst==4)] <- 1
data_normalized$wed2[which(data$s_marst==5)] <- 1

# состоял/не состоял в браке
data_normalized$wed3 = 0
data_normalized$wed3[which(data$s_marst==1)] <- 1

# проверяем наличие линейной зависимости между переменными
vif(lm(data$sj13.2 ~ data_normalized$wed1 + data_normalized$wed2 + data_normalized$wed3)) 

In [17]:
# есть/нет высшее образование
data_normalized$higher_educ = 0
data_normalized$higher_educ[which(data$s_diplom==6)] <- 1

In [18]:
# живет/не живет в городе
data_normalized$city_status = 0
data_normalized$city_status[which(data$status==1)] <- 1
data_normalized$city_status[which(data$status==2)] <- 1

In [19]:
# среднее количество рабочих часов в неделю (нормализованное)
working_hours = data$sj6.2
data_normalized$working_hours = (working_hours - mean(working_hours)) / sqrt(var(working_hours))

In [20]:
# средняя зарплата (нормализованная)
salary = data$sj13.2
data_normalized$salary = (salary - mean(salary)) / sqrt(var(salary))

In [22]:
model1 = lm(data = data_normalized, salary ~ sex + age + wed1 + wed2 + wed3 + higher_educ + city_status + working_hours)
vif(model1) # корреляцией между перменными можно пренебречь (1<корреляция<5)
summary(model1)
# R^2 ~ 0,154
# p-статистика очень плохая для переменных wed1 и wed2


Call:
lm(formula = salary ~ sex + age + wed1 + wed2 + wed3 + higher_educ + 
    city_status + working_hours, data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0993 -0.4952 -0.1422  0.2731 15.3665 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   -0.63161    0.04817 -13.113  < 2e-16 ***
sex            0.42071    0.02906  14.479  < 2e-16 ***
age           -0.04542    0.01532  -2.964  0.00305 ** 
wed1           0.03503    0.04300   0.815  0.41535    
wed2           0.02429    0.05441   0.446  0.65531    
wed3          -0.13327    0.05398  -2.469  0.01358 *  
higher_educ    0.55315    0.02960  18.687  < 2e-16 ***
city_status    0.34525    0.03116  11.080  < 2e-16 ***
working_hours  0.13145    0.01405   9.357  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9207 on 4563 degrees of freedom
Multiple R-squared:  0.1539,	Adjusted R-squared:  0.1524 
F-statistic: 103.7 on 

In [23]:
model1 = lm(data = data_normalized, salary ~ sex + age + wed3 + higher_educ + city_status + working_hours)
vif(model1) # корреляцией между перменными можно пренебречь (1<корреляция<5)
summary(model1)
# R^2 ~ 0,154 - совершенно не изменился после удаления незначительных переменных
# В дальнейшем будем использовать эту модель


Call:
lm(formula = salary ~ sex + age + wed3 + higher_educ + city_status + 
    working_hours, data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0925 -0.4960 -0.1426  0.2747 15.3723 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   -0.60373    0.03096 -19.499  < 2e-16 ***
sex            0.42217    0.02811  15.020  < 2e-16 ***
age           -0.04431    0.01487  -2.981  0.00289 ** 
wed3          -0.16012    0.04220  -3.794  0.00015 ***
higher_educ    0.55489    0.02952  18.797  < 2e-16 ***
city_status    0.34353    0.03108  11.055  < 2e-16 ***
working_hours  0.13140    0.01405   9.355  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9205 on 4565 degrees of freedom
Multiple R-squared:  0.1538,	Adjusted R-squared:  0.1526 
F-statistic: 138.2 on 6 and 4565 DF,  p-value: < 2.2e-16


In [24]:
# логарифмы
model1 = lm(data = data_normalized, salary ~ sex + working_hours + age + wed3 + higher_educ + city_status + I(log(working_hours)) + I(log(age)))
data = na.omit(data)
vif(model1)
summary(model1)
# R^2 ~ 0.21
# Плохие VIF и p-статистика

“NaNs produced”
“NaNs produced”



Call:
lm(formula = salary ~ sex + working_hours + age + wed3 + higher_educ + 
    city_status + I(log(working_hours)) + I(log(age)), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8613 -0.4870 -0.1407  0.2778  5.5884 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)           -0.400036   0.178717  -2.238 0.025509 *  
sex                    0.394531   0.064386   6.128 1.49e-09 ***
working_hours          0.150452   0.072520   2.075 0.038385 *  
age                   -0.413392   0.115648  -3.575 0.000375 ***
wed3                  -0.077339   0.184072  -0.420 0.674501    
higher_educ            0.647010   0.084304   7.675 5.56e-14 ***
city_status            0.409445   0.069111   5.924 4.91e-09 ***
I(log(working_hours))  0.005042   0.076073   0.066 0.947177    
I(log(age))            0.049540   0.054440   0.910 0.363135    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 

In [25]:
model1 = lm(data = data_normalized, salary ~ sex + working_hours + age + wed3 + higher_educ + city_status + I(log(age)))
vif(model1)
summary(model1)
# R^2 ~ 0.179
# приемлемый VIF, плохая p-статистика для wed3 и log(age)

“NaNs produced”



Call:
lm(formula = salary ~ sex + working_hours + age + wed3 + higher_educ + 
    city_status + I(log(age)), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8761 -0.4719 -0.1546  0.2382 12.7759 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)   -0.3862657  0.0762609  -5.065 4.42e-07 ***
sex            0.3292445  0.0387949   8.487  < 2e-16 ***
working_hours  0.1378853  0.0193108   7.140 1.25e-12 ***
age           -0.2901435  0.0605743  -4.790 1.78e-06 ***
wed3           0.0004961  0.0967844   0.005    0.996    
higher_educ    0.6297315  0.0419518  15.011  < 2e-16 ***
city_status    0.3908087  0.0410071   9.530  < 2e-16 ***
I(log(age))    0.0270292  0.0328456   0.823    0.411    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8778 on 2228 degrees of freedom
  (2336 observations deleted due to missingness)
Multiple R-squared:  0.1792,	Adjusted R-squared:  0.1766 
F-statisti

In [26]:
model1 = lm(data = data_normalized, salary ~ sex + age + wed3 + higher_educ + city_status + I(log(working_hours)) + I(log(age)))
vif(model1)
summary(model1)
# R^2 ~ 0.2082
# приемлемый VIF, плохая p-статистика для wed3 и log(age)

“NaNs produced”
“NaNs produced”



Call:
lm(formula = salary ~ sex + age + wed3 + higher_educ + city_status + 
    I(log(working_hours)) + I(log(age)), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8370 -0.4920 -0.1491  0.2900  5.6769 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)           -0.16741    0.13949  -1.200 0.230491    
sex                    0.40447    0.06436   6.285 5.76e-10 ***
age                   -0.43232    0.11556  -3.741 0.000198 ***
wed3                  -0.06969    0.18447  -0.378 0.705690    
higher_educ            0.63628    0.08434   7.544 1.41e-13 ***
city_status            0.40799    0.06927   5.890 5.99e-09 ***
I(log(working_hours))  0.14659    0.03373   4.347 1.59e-05 ***
I(log(age))            0.05669    0.05446   1.041 0.298244    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8452 on 703 degrees of freedom
  (3861 observations deleted due to missingness)
Multipl

In [27]:
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(log(working_hours)) + I(log(age)))
vif(model1)
summary(model1)
# R^2 ~ 0.1924
# приемлемый VIF, отличная p-статистика

“NaNs produced”
“NaNs produced”



Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(log(working_hours)) + 
    I(log(age)), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0121 -0.5129 -0.1591  0.2921  5.7837 

Coefficients:
                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)           -0.61335    0.07337  -8.360 3.34e-16 ***
sex                    0.40160    0.06471   6.206 9.26e-10 ***
higher_educ            0.63431    0.08504   7.459 2.57e-13 ***
city_status            0.40314    0.06974   5.780 1.12e-08 ***
I(log(working_hours))  0.15653    0.03390   4.617 4.63e-06 ***
I(log(age))           -0.11755    0.02829  -4.155 3.65e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8524 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.1924,	Adjusted R-squared:  0.1866 
F-statistic: 33.58 on 5 and 705 DF,  p-value: < 2.2e-16


In [28]:
# степени
current_pow = 0.1
model1 = lm(data = data_normalized, salary ~ sex + working_hours + age + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.213
# плохой VIF, плохая p-статистика


Call:
lm(formula = salary ~ sex + working_hours + age + wed3 + higher_educ + 
    city_status + I(working_hours^current_pow) + I(age^current_pow), 
    data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8641 -0.4866 -0.1413  0.2797  5.5844 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -1.17746    0.98953  -1.190 0.234479    
sex                           0.39483    0.06439   6.132 1.45e-09 ***
working_hours                 0.14071    0.08226   1.711 0.087592 .  
age                          -0.42897    0.12886  -3.329 0.000917 ***
wed3                         -0.07694    0.18406  -0.418 0.676076    
higher_educ                   0.64689    0.08430   7.674 5.60e-14 ***
city_status                   0.40972    0.06911   5.928 4.80e-09 ***
I(working_hours^current_pow)  0.16587    0.89509   0.185 0.853036    
I(age^current_pow)            0.64218    0.69312   0.927 0.354503    
---
Signif. co

In [29]:
model1 = lm(data = data_normalized, salary ~ sex + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.196
# отличный VIF, плохое p-значение у wed3


Call:
lm(formula = salary ~ sex + wed3 + higher_educ + city_status + 
    I(working_hours^current_pow) + I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0211 -0.5127 -0.1541  0.2967  5.7492 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.88509    0.47906  -1.848   0.0651 .  
sex                           0.39844    0.06480   6.148 1.31e-09 ***
wed3                         -0.04638    0.18557  -0.250   0.8027    
higher_educ                   0.63733    0.08493   7.504 1.87e-13 ***
city_status                   0.40380    0.06973   5.791 1.05e-08 ***
I(working_hours^current_pow)  1.67312    0.35104   4.766 2.28e-06 ***
I(age^current_pow)           -1.40726    0.32311  -4.355 1.53e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8509 on 704 degrees of freedom
  (3861 observations deleted due to missingness)
Multip

In [30]:
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.196
# отличный VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0188 -0.5103 -0.1557  0.2884  5.7506 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.88950    0.47842  -1.859   0.0634 .  
sex                           0.39967    0.06457   6.190 1.02e-09 ***
higher_educ                   0.63689    0.08486   7.505 1.85e-13 ***
city_status                   0.40284    0.06957   5.790 1.06e-08 ***
I(working_hours^current_pow)  1.67215    0.35079   4.767 2.27e-06 ***
I(age^current_pow)           -1.40301    0.32245  -4.351 1.55e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8503 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.1963,	Adjusted R-squared:  0.1906 
F-statistic: 34.44 on 5 a

In [31]:
current_pow = 0.2
model1 = lm(data = data_normalized, salary ~ sex + working_hours + age + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.213
# плохой VIF, плохая p-статистика


Call:
lm(formula = salary ~ sex + working_hours + age + wed3 + higher_educ + 
    city_status + I(working_hours^current_pow) + I(age^current_pow), 
    data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8675 -0.4855 -0.1418  0.2823  5.5802 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.90802    0.51566  -1.761  0.07869 .  
sex                           0.39514    0.06439   6.137 1.41e-09 ***
working_hours                 0.12753    0.09456   1.349  0.17785    
age                          -0.44827    0.14599  -3.070  0.00222 ** 
wed3                         -0.07654    0.18405  -0.416  0.67766    
higher_educ                   0.64679    0.08429   7.673 5.63e-14 ***
city_status                   0.40995    0.06912   5.931 4.71e-09 ***
I(working_hours^current_pow)  0.16134    0.52757   0.306  0.75983    
I(age^current_pow)            0.41538    0.44177   0.940  0.34741    
---
Signif. co

In [32]:
model1 = lm(data = data_normalized, salary ~ sex  + age + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.21
# плохой VIF, плохие p-значения переменных wed3 и age


Call:
lm(formula = salary ~ sex + age + wed3 + higher_educ + city_status + 
    I(working_hours^current_pow) + I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8644 -0.4857 -0.1433  0.2832  5.6421 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -1.42408    0.34589  -4.117 4.29e-05 ***
sex                           0.40080    0.06429   6.234 7.82e-10 ***
age                          -0.46020    0.14581  -3.156  0.00167 ** 
wed3                         -0.07175    0.18413  -0.390  0.69688    
higher_educ                   0.64108    0.08424   7.611 8.80e-14 ***
city_status                   0.40930    0.06915   5.919 5.07e-09 ***
I(working_hours^current_pow)  0.83073    0.17904   4.640 4.16e-06 ***
I(age^current_pow)            0.44828    0.44135   1.016  0.31012    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8437 on 70

In [33]:
model1 = lm(data = data_normalized, salary ~ sex + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.199
# хороший VIF, плохое p-значение переменной wed3


Call:
lm(formula = salary ~ sex + wed3 + higher_educ + city_status + 
    I(working_hours^current_pow) + I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0223 -0.5123 -0.1477  0.2946  5.7182 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.67986    0.25467  -2.670  0.00777 ** 
sex                           0.39672    0.06468   6.133 1.43e-09 ***
wed3                         -0.05172    0.18519  -0.279  0.78009    
higher_educ                   0.63960    0.08477   7.545 1.40e-13 ***
city_status                   0.40366    0.06957   5.802 9.89e-09 ***
I(working_hours^current_pow)  0.87620    0.17959   4.879 1.32e-06 ***
I(age^current_pow)           -0.82324    0.18138  -4.539 6.65e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.849 on 704 degrees of freedom
  (3861 observations deleted due to missingness)
Multipl

In [35]:
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.199
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0198 -0.5101 -0.1450  0.2837  5.7198 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.68330    0.25420  -2.688  0.00736 ** 
sex                           0.39810    0.06445   6.177 1.11e-09 ***
higher_educ                   0.63910    0.08469   7.546 1.39e-13 ***
city_status                   0.40259    0.06942   5.799 1.00e-08 ***
I(working_hours^current_pow)  0.87561    0.17946   4.879 1.32e-06 ***
I(age^current_pow)           -0.82046    0.18098  -4.533 6.82e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8485 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.1998,	Adjusted R-squared:  0.1942 
F-statistic: 35.21 on 5 a

In [36]:
current_pow = 0.3
model1 = lm(data = data_normalized, salary ~ sex + working_hours + age + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.213
# очень плохой VIF, очень плохая p-статистика


Call:
lm(formula = salary ~ sex + working_hours + age + wed3 + higher_educ + 
    city_status + I(working_hours^current_pow) + I(age^current_pow), 
    data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8713 -0.4836 -0.1414  0.2770  5.5828 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.81931    0.35960  -2.278  0.02300 *  
sex                           0.39543    0.06438   6.142 1.37e-09 ***
working_hours                 0.10976    0.11043   0.994  0.32061    
age                          -0.47270    0.16866  -2.803  0.00521 ** 
wed3                         -0.07614    0.18404  -0.414  0.67921    
higher_educ                   0.64672    0.08429   7.673 5.64e-14 ***
city_status                   0.41014    0.06912   5.934 4.64e-09 ***
I(working_hours^current_pow)  0.17740    0.41683   0.426  0.67053    
I(age^current_pow)            0.35859    0.37722   0.951  0.34213    
---
Signif. co

In [37]:
model1 = lm(data = data_normalized, salary ~ sex + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.20
# отличный VIF, плохое p-значение у переменной wed3


Call:
lm(formula = salary ~ sex + wed3 + higher_educ + city_status + 
    I(working_hours^current_pow) + I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0183 -0.5054 -0.1439  0.2860  5.6900 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.60629    0.18244  -3.323 0.000936 ***
sex                           0.39536    0.06458   6.122 1.53e-09 ***
wed3                         -0.05661    0.18485  -0.306 0.759495    
higher_educ                   0.64150    0.08463   7.580 1.09e-13 ***
city_status                   0.40361    0.06943   5.813 9.30e-09 ***
I(working_hours^current_pow)  0.60119    0.12128   4.957 8.97e-07 ***
I(age^current_pow)           -0.62980    0.13381  -4.707 3.03e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8474 on 704 degrees of freedom
  (3861 observations deleted due to missingness)
Multip

In [38]:
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.202
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0156 -0.5041 -0.1420  0.2841  5.6919 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.60952    0.18202  -3.349 0.000856 ***
sex                           0.39687    0.06435   6.167 1.17e-09 ***
higher_educ                   0.64096    0.08455   7.580 1.09e-13 ***
city_status                   0.40244    0.06928   5.809 9.53e-09 ***
I(working_hours^current_pow)  0.60073    0.12119   4.957 8.98e-07 ***
I(age^current_pow)           -0.62747    0.13351  -4.700 3.13e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8469 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2029,	Adjusted R-squared:  0.1972 
F-statistic: 35.89 on 5 a

In [39]:
current_pow = 0.4
model1 = lm(data = data_normalized, salary ~ sex + working_hours + age + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.213
# очень плохой VIF, очень плохая p-статистика


Call:
lm(formula = salary ~ sex + working_hours + age + wed3 + higher_educ + 
    city_status + I(working_hours^current_pow) + I(age^current_pow), 
    data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8755 -0.4852 -0.1406  0.2766  5.5875 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.77485    0.28271  -2.741  0.00628 ** 
sex                           0.39569    0.06438   6.146 1.33e-09 ***
working_hours                 0.08549    0.13159   0.650  0.51610    
age                          -0.50465    0.19953  -2.529  0.01165 *  
wed3                         -0.07577    0.18402  -0.412  0.68067    
higher_educ                   0.64667    0.08428   7.673 5.63e-14 ***
city_status                   0.41028    0.06912   5.936 4.59e-09 ***
I(working_hours^current_pow)  0.20316    0.37443   0.543  0.58759    
I(age^current_pow)            0.35030    0.36600   0.957  0.33884    
---
Signif. co

In [40]:
model1 = lm(data = data_normalized, salary ~ sex + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.205
# хороший VIF, плохое p-значение у переменной wed3


Call:
lm(formula = salary ~ sex + wed3 + higher_educ + city_status + 
    I(working_hours^current_pow) + I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0100 -0.4980 -0.1441  0.2920  5.6650 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.56748    0.14782  -3.839 0.000135 ***
sex                           0.39435    0.06449   6.115 1.60e-09 ***
wed3                         -0.06098    0.18458  -0.330 0.741219    
higher_educ                   0.64306    0.08451   7.609 8.85e-14 ***
city_status                   0.40368    0.06931   5.824 8.74e-09 ***
I(working_hours^current_pow)  0.45698    0.09130   5.006 7.05e-07 ***
I(age^current_pow)           -0.53232    0.10958  -4.858 1.46e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.846 on 704 degrees of freedom
  (3861 observations deleted due to missingness)
Multipl

In [41]:
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1) 
# R^2 ~  0.205
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0071 -0.4953 -0.1426  0.2889  5.6671 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.57066    0.14742  -3.871 0.000118 ***
sex                           0.39597    0.06426   6.162 1.21e-09 ***
higher_educ                   0.64246    0.08444   7.609 8.87e-14 ***
city_status                   0.40242    0.06916   5.818 9.03e-09 ***
I(working_hours^current_pow)  0.45658    0.09123   5.005 7.07e-07 ***
I(age^current_pow)           -0.53020    0.10932  -4.850 1.52e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8455 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2054,	Adjusted R-squared:  0.1998 
F-statistic: 36.46 on 5 a

In [42]:
current_pow = 0.5
model1 = lm(data = data_normalized, salary ~ sex + working_hours + age + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~ 0,214
# очень плохой VIF, плохая p-статистика


Call:
lm(formula = salary ~ sex + working_hours + age + wed3 + higher_educ + 
    city_status + I(working_hours^current_pow) + I(age^current_pow), 
    data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8799 -0.4851 -0.1395  0.2747  5.5922 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.74739    0.23731  -3.149  0.00171 ** 
sex                           0.39592    0.06438   6.150 1.30e-09 ***
working_hours                 0.05133    0.16112   0.319  0.75014    
age                          -0.54842    0.24337  -2.253  0.02454 *  
wed3                         -0.07542    0.18401  -0.410  0.68203    
higher_educ                   0.64665    0.08427   7.674 5.60e-14 ***
city_status                   0.41036    0.06911   5.937 4.55e-09 ***
I(working_hours^current_pow)  0.23939    0.36546   0.655  0.51266    
I(age^current_pow)            0.37003    0.38567   0.959  0.33766    
---
Signif. co

In [43]:
model1 = lm(data = data_normalized, salary ~ sex + wed3 + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~ 0,208
# хороший VIF, плохое p-значение у переменной wed3


Call:
lm(formula = salary ~ sex + wed3 + higher_educ + city_status + 
    I(working_hours^current_pow) + I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9982 -0.4942 -0.1450  0.2966  5.6434 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.54389    0.12795  -4.251 2.42e-05 ***
sex                           0.39364    0.06443   6.110 1.65e-09 ***
wed3                         -0.06479    0.18435  -0.351    0.725    
higher_educ                   0.64430    0.08441   7.633 7.49e-14 ***
city_status                   0.40388    0.06921   5.835 8.20e-09 ***
I(working_hours^current_pow)  0.36554    0.07270   5.028 6.29e-07 ***
I(age^current_pow)           -0.47200    0.09455  -4.992 7.54e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8449 on 704 degrees of freedom
  (3861 observations deleted due to missingness)
Multip

In [44]:
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~ 0,208
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9952 -0.4911 -0.1429  0.2853  5.6457 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.54707    0.12755  -4.289 2.05e-05 ***
sex                           0.39537    0.06420   6.159 1.23e-09 ***
higher_educ                   0.64366    0.08434   7.632 7.53e-14 ***
city_status                   0.40254    0.06907   5.828 8.52e-09 ***
I(working_hours^current_pow)  0.36520    0.07265   5.027 6.32e-07 ***
I(age^current_pow)           -0.47000    0.09432  -4.983 7.88e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8444 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2075,	Adjusted R-squared:  0.2019 
F-statistic: 36.92 on 5 a

In [45]:
current_pow = 0.9
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~ 0.2117
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9277 -0.4900 -0.1437  0.2735  5.6103 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51336    0.09513  -5.396 9.29e-08 ***
sex                           0.39521    0.06408   6.167 1.17e-09 ***
higher_educ                   0.64601    0.08415   7.677 5.46e-14 ***
city_status                   0.40440    0.06885   5.873 6.58e-09 ***
I(working_hours^current_pow)  0.18215    0.03690   4.936 9.97e-07 ***
I(age^current_pow)           -0.34321    0.06402  -5.361 1.12e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8422 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2117,	Adjusted R-squared:  0.2061 
F-statistic: 37.87 on 5 a

In [46]:
current_pow = 1.1
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~ 0.2118
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8889 -0.4925 -0.1366  0.2710  5.6172 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51265    0.08853  -5.791 1.05e-08 ***
sex                           0.39602    0.06409   6.179 1.09e-09 ***
higher_educ                   0.64611    0.08415   7.678 5.41e-14 ***
city_status                   0.40595    0.06883   5.897 5.73e-09 ***
I(working_hours^current_pow)  0.13419    0.02784   4.820 1.76e-06 ***
I(age^current_pow)           -0.30164    0.05515  -5.469 6.28e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8421 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2118,	Adjusted R-squared:  0.2062 
F-statistic: 37.88 on 5 a

In [47]:
current_pow = 1.2
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~  0.2114
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8692 -0.4902 -0.1332  0.2742  5.6216 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51419    0.08615  -5.969 3.79e-09 ***
sex                           0.39656    0.06411   6.185 1.05e-09 ***
higher_educ                   0.64597    0.08417   7.674 5.55e-14 ***
city_status                   0.40681    0.06884   5.909 5.35e-09 ***
I(working_hours^current_pow)  0.11548    0.02431   4.751 2.45e-06 ***
I(age^current_pow)           -0.28288    0.05138  -5.506 5.16e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8423 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2114,	Adjusted R-squared:  0.2058 
F-statistic:  37.8 on 5 a

In [48]:
current_pow = 1.3
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~  0.2108
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8496 -0.4916 -0.1362  0.2727  5.6264 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51658    0.08419  -6.136 1.41e-09 ***
sex                           0.39716    0.06414   6.192 1.01e-09 ***
higher_educ                   0.64570    0.08420   7.668 5.79e-14 ***
city_status                   0.40771    0.06886   5.921 5.01e-09 ***
I(working_hours^current_pow)  0.09943    0.02126   4.676 3.51e-06 ***
I(age^current_pow)           -0.26511    0.04793  -5.531 4.49e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8426 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2108,	Adjusted R-squared:  0.2052 
F-statistic: 37.67 on 5 a

In [49]:
current_pow = 1.4
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~  0.21
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8346 -0.4883 -0.1409  0.2746  5.6315 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51959    0.08255  -6.294 5.42e-10 ***
sex                           0.39782    0.06417   6.200 9.64e-10 ***
higher_educ                   0.64534    0.08424   7.661 6.13e-14 ***
city_status                   0.40863    0.06889   5.932 4.70e-09 ***
I(working_hours^current_pow)  0.08558    0.01862   4.595 5.12e-06 ***
I(age^current_pow)           -0.24816    0.04474  -5.546 4.13e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.843 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:   0.21,	Adjusted R-squared:  0.2044 
F-statistic: 37.49 on 5 and

In [50]:
current_pow = 1.5
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~  0.209
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8302 -0.4865 -0.1399  0.2771  5.6369 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.52305    0.08116  -6.445 2.15e-10 ***
sex                           0.39853    0.06421   6.207 9.22e-10 ***
higher_educ                   0.64488    0.08429   7.651 6.57e-14 ***
city_status                   0.40956    0.06893   5.942 4.43e-09 ***
I(working_hours^current_pow)  0.07358    0.01632   4.510 7.59e-06 ***
I(age^current_pow)           -0.23196    0.04178  -5.552 4.01e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8436 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2091,	Adjusted R-squared:  0.2035 
F-statistic: 37.27 on 5 a

In [51]:
current_pow = 1.7
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~  0.206
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8214 -0.4848 -0.1501  0.2846  5.6479 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.53075    0.07896  -6.722 3.70e-11 ***
sex                           0.40006    0.06430   6.222 8.42e-10 ***
higher_educ                   0.64370    0.08441   7.626 7.84e-14 ***
city_status                   0.41139    0.06903   5.960 3.99e-09 ***
I(working_hours^current_pow)  0.05414    0.01251   4.327 1.73e-05 ***
I(age^current_pow)           -0.20163    0.03642  -5.536 4.38e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8448 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2067,	Adjusted R-squared:  0.201 
F-statistic: 36.73 on 5 an

In [52]:
current_pow = 1.9
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~  0.203
# хороший VIF, отличная p-статистика


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8126 -0.4901 -0.1525  0.2868  5.6587 

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.538834   0.077304  -6.970 7.28e-12 ***
sex                           0.401708   0.064404   6.237 7.67e-10 ***
higher_educ                   0.642208   0.084548   7.596 9.75e-14 ***
city_status                   0.413089   0.069151   5.974 3.68e-09 ***
I(working_hours^current_pow)  0.039482   0.009564   4.128 4.09e-05 ***
I(age^current_pow)           -0.173963   0.031702  -5.488 5.69e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8464 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2037,	Adjusted R-squared:  0.1981 
F-statistic: 36.07

In [53]:
current_pow = 2
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~  0.154
# отличный VIF, плохое p-значение у переменной I(working_hours^current_pow)


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0793 -0.5032 -0.1519  0.2745 15.3247 

Coefficients:
                              Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.532728   0.032595 -16.344   <2e-16 ***
sex                           0.476045   0.027654  17.215   <2e-16 ***
higher_educ                   0.535505   0.029291  18.282   <2e-16 ***
city_status                   0.358494   0.031064  11.540   <2e-16 ***
I(working_hours^current_pow)  0.007734   0.004232   1.827   0.0677 .  
I(age^current_pow)           -0.129989   0.012227 -10.632   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9201 on 4566 degrees of freedom
Multiple R-squared:  0.1544,	Adjusted R-squared:  0.1535 
F-statistic: 166.7 on 5 and 4566 DF,  p-value: < 2.2e-16


In [55]:
current_pow = 1.2
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# Multiple R-squared:  0.2114
# Adjusted R-squared:  0.2058


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8692 -0.4902 -0.1332  0.2742  5.6216 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51419    0.08615  -5.969 3.79e-09 ***
sex                           0.39656    0.06411   6.185 1.05e-09 ***
higher_educ                   0.64597    0.08417   7.674 5.55e-14 ***
city_status                   0.40681    0.06884   5.909 5.35e-09 ***
I(working_hours^current_pow)  0.11548    0.02431   4.751 2.45e-06 ***
I(age^current_pow)           -0.28288    0.05138  -5.506 5.16e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8423 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2114,	Adjusted R-squared:  0.2058 
F-statistic:  37.8 on 5 a

In [56]:
current_pow = 1.1
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# Multiple R-squared:  0.2118
# Adjusted R-squared:  0.2062 


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8889 -0.4925 -0.1366  0.2710  5.6172 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51265    0.08853  -5.791 1.05e-08 ***
sex                           0.39602    0.06409   6.179 1.09e-09 ***
higher_educ                   0.64611    0.08415   7.678 5.41e-14 ***
city_status                   0.40595    0.06883   5.897 5.73e-09 ***
I(working_hours^current_pow)  0.13419    0.02784   4.820 1.76e-06 ***
I(age^current_pow)           -0.30164    0.05515  -5.469 6.28e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8421 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2118,	Adjusted R-squared:  0.2062 
F-statistic: 37.88 on 5 a

In [57]:
current_pow = 0.9
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# Multiple R-squared:  0.2117
# Adjusted R-squared:  0.2061 


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9277 -0.4900 -0.1437  0.2735  5.6103 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51336    0.09513  -5.396 9.29e-08 ***
sex                           0.39521    0.06408   6.167 1.17e-09 ***
higher_educ                   0.64601    0.08415   7.677 5.46e-14 ***
city_status                   0.40440    0.06885   5.873 6.58e-09 ***
I(working_hours^current_pow)  0.18215    0.03690   4.936 9.97e-07 ***
I(age^current_pow)           -0.34321    0.06402  -5.361 1.12e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8422 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2117,	Adjusted R-squared:  0.2061 
F-statistic: 37.87 on 5 a

In [58]:
# Данная модель показала наилучшие результаты, то есть:
# Хороший VIF, отличная p-статистика и лушчий R^2 = 0,2118
current_pow = 1.1
model1 = lm(data = data_normalized, salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)


Call:
lm(formula = salary ~ sex + higher_educ + city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_normalized)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8889 -0.4925 -0.1366  0.2710  5.6172 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.51265    0.08853  -5.791 1.05e-08 ***
sex                           0.39602    0.06409   6.179 1.09e-09 ***
higher_educ                   0.64611    0.08415   7.678 5.41e-14 ***
city_status                   0.40595    0.06883   5.897 5.73e-09 ***
I(working_hours^current_pow)  0.13419    0.02784   4.820 1.76e-06 ***
I(age^current_pow)           -0.30164    0.05515  -5.469 6.28e-08 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8421 on 705 degrees of freedom
  (3861 observations deleted due to missingness)
Multiple R-squared:  0.2118,	Adjusted R-squared:  0.2062 
F-statistic: 37.88 on 5 a

In [None]:
current_pow = 1.1

In [59]:
# не вступавшие в брак мужчины без высшего образования
data_picked = subset(data_normalized, sex == 1)
data_picked = subset(data_picked, higher_educ == 0)
data_picked = subset(data_picked, wed3 == 0)

model1 = lm(data = data_picked, salary ~ city_status + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~ 0,1746
# Все регрессоры являются значимыми
# Среди этой группы наибольшую зарплату имеют молодые люди, проживающие в городах и много работающие


Call:
lm(formula = salary ~ city_status + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_picked)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0276 -0.5429 -0.1680  0.3812  5.4258 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                  -0.08348    0.12868  -0.649   0.5170    
city_status                   0.40932    0.10377   3.944   0.0001 ***
I(working_hours^current_pow)  0.17450    0.03743   4.662 4.78e-06 ***
I(age^current_pow)           -0.39727    0.08911  -4.458 1.18e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.8552 on 291 degrees of freedom
  (938 observations deleted due to missingness)
Multiple R-squared:  0.1746,	Adjusted R-squared:  0.1661 
F-statistic: 20.51 on 3 and 291 DF,  p-value: 4.365e-12


In [61]:
# городские жители, мужчины, состоящие в браке
data_picked = subset(data_normalized, sex == 1)
data_picked = subset(data_picked, city_status == 1)
data_picked = subset(data_picked, wed1 == 1)

model1 = lm(data = data_picked, salary ~ higher_educ + I(working_hours^current_pow) + I(age^current_pow))
vif(model1)
summary(model1)
# R^2 ~ 0, 1858
# Регрессоры higher_educ и I(age^current_pow) являются самыми значимыми,
# I(working_hours^current pow) - второй по значимости
# Среди этой группы наибольшую зарплату имеют молодые люди, имеющие высшее образование и много работающие


Call:
lm(formula = salary ~ higher_educ + I(working_hours^current_pow) + 
    I(age^current_pow), data = data_picked)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.2351 -0.5702 -0.1559  0.4263  5.2282 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   0.47885    0.15902   3.011 0.002982 ** 
higher_educ                   0.67940    0.19663   3.455 0.000688 ***
I(working_hours^current_pow)  0.19651    0.06551   3.000 0.003093 ** 
I(age^current_pow)           -0.52387    0.13255  -3.952 0.000112 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.03 on 177 degrees of freedom
  (767 observations deleted due to missingness)
Multiple R-squared:  0.1858,	Adjusted R-squared:  0.172 
F-statistic: 13.46 on 3 and 177 DF,  p-value: 5.913e-08
