In [1]:
import pandas as pd
import numpy as np

In [2]:
import statsmodels.api as sm

In [3]:
# read data
data = pd.read_csv("bm.csv")

In [4]:
# Q1 percentage of resume received a call back
callback_percentage = (data['call'].sum() / data['call'].count()) * 100
print(callback_percentage)

8.049281314168377


In [5]:
callback_rate_by_race = data.groupby('black')['call'].mean()
print(callback_rate_by_race)

black
0    0.096509
1    0.064476
Name: call, dtype: float64


In [6]:
# 3. 计算黑人和白人申请者的平均回电率差异
callback_difference = callback_rate_by_race[1] - callback_rate_by_race[0]

# 4. 进行 t-检验以检验回电率差异是否显著
t_test_result = sm.stats.ttest_ind(data[data['black'] == 1]['call'], data[data['black'] == 0]['call'])

# 5. 运行回归模型
model = sm.OLS(data['call'], data['black']).fit()

# (a) 检查回归系数是否与之前估计的平均差异相匹配
coefficient_match = model.params['black'] == callback_difference

# (b) 检查回归系数是否显著
coefficient_significance = model.pvalues['black'] < 0.05

# 6. 运行单独的回归模型
model_female = sm.OLS(data['female'], data['black']).fit()
model_exper = sm.OLS(data['exper'], data['black']).fit()

In [7]:
# 输出回归结果
print("1. Callback Percentage:", callback_percentage)
print("2. Callback Rate by Race:\n", callback_rate_by_race)
print("3. Callback Rate Difference (Black - White):", callback_difference)
print("4. T-Test for Difference in Callback Rates:", t_test_result)
print("5. Regression Results for Black:\n", model.summary())
print("(a) Coefficient Match:", coefficient_match)
print("(b) Coefficient Significance:", coefficient_significance)
print("6. Regression Results for Female and Experience (Exper):\n", model_female.summary(), model_exper.summary())

1. Callback Percentage: 8.049281314168377
2. Callback Rate by Race:
 black
0    0.096509
1    0.064476
Name: call, dtype: float64
3. Callback Rate Difference (Black - White): -0.032032854209445585
4. T-Test for Difference in Callback Rates: (-4.114705266723098, 3.940802514069475e-05, 4868.0)
5. Regression Results for Black:
                                  OLS Regression Results                                
Dep. Variable:                   call   R-squared (uncentered):                   0.026
Model:                            OLS   Adj. R-squared (uncentered):              0.026
Method:                 Least Squares   F-statistic:                              129.1
Date:                Sat, 14 Oct 2023   Prob (F-statistic):                    1.54e-29
Time:                        22:24:17   Log-Likelihood:                         -711.33
No. Observations:                4870   AIC:                                      1425.
Df Residuals:                    4869   BIC:             

In [9]:
import statsmodels.api as sm
import pandas as pd

# 拟合线性回归模型
X = data["black"]
X = sm.add_constant(X)  # 添加截距项
y = data["call"]
model = sm.OLS(y, X).fit()

# 打印回归结果
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:                   call   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     16.93
Date:                Sat, 14 Oct 2023   Prob (F-statistic):           3.94e-05
Time:                        22:38:38   Log-Likelihood:                -562.24
No. Observations:                4870   AIC:                             1128.
Df Residuals:                    4868   BIC:                             1141.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0965      0.006     17.532      0.0

In [10]:
# 拟合回归模型：female on black
X_female = data["black"]
X_female = sm.add_constant(X_female)  # 添加截距项
y_female = data["female"]
model_female = sm.OLS(y_female, X_female).fit()

# 打印回归结果
print("Regression Results for Female on Black:")
print(model_female.summary())

# 拟合回归模型：exper on black
X_exper = data["black"]
X_exper = sm.add_constant(X_exper)  # 添加截距项
y_exper = data["exper"]
model_exper = sm.OLS(y_exper, X_exper).fit()

# 打印回归结果
print("Regression Results for Exper on Black:")
print(model_exper.summary())


Regression Results for Female on Black:
                            OLS Regression Results                            
Dep. Variable:                 female   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                    0.7817
Date:                Sat, 14 Oct 2023   Prob (F-statistic):              0.377
Time:                        22:46:15   Log-Likelihood:                -2700.7
No. Observations:                4870   AIC:                             5405.
Df Residuals:                    4868   BIC:                             5418.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       

In [11]:
np.log(3^10)

2.1972245773362196

In [13]:
np.log(0.05)

-2.995732273553991