In [2]:
# Import necessary libraries
import pandas as pd
import statsmodels.api as sm

# Define file path (Make sure this matches where you saved the dataset)
file_path = "/Users/jakubriha/Desktop/ECO225/Project/Data/data_df.csv"  

# Load the dataset
data_df = pd.read_csv(file_path)

In [4]:
# Define function to run robust regressions
def run_robust_regression(y, X, model_name):
    """Runs OLS regression with robust standard errors and prints the summary."""
    X = sm.add_constant(X)  # Add intercept
    model = sm.OLS(y, X, missing='drop').fit(cov_type='HC0')  # HC0 for robust SE
    print(f"\nRegression: {model_name}")
    print(model.summary())

# Define dependent variables
funding_time_y = data_df['funding_time_days']  # Raw funding time
log_funding_time_y = data_df['log_funding_time']  # Log-transformed funding time

# Define independent variables
X_female = data_df[['gender_dummy']]
X_loan_amount = data_df[['loan_amount']]
X_hdi = data_df[['hdi']]
X_loan_terms = data_df[['term_in_months']]
X_picture = data_df[['borrower_pictured']]

# Interaction terms
data_df['hdi_female_interaction'] = data_df['hdi'] * data_df['gender_dummy']
data_df['picture_female_interaction'] = data_df['borrower_pictured'] * data_df['gender_dummy']

# Multiple independent variable sets
X_hdi_female = data_df[['hdi', 'gender_dummy', 'hdi_female_interaction']]
X_picture_female = data_df[['borrower_pictured', 'gender_dummy', 'picture_female_interaction']]
X_full1 = data_df[['gender_dummy', 'loan_amount', 'hdi', 'borrower_pictured', 'term_in_months', 'hdi_female_interaction']]
X_full2 = data_df[['gender_dummy', 'loan_amount', 'hdi', 'borrower_pictured', 'term_in_months', 'picture_female_interaction']]


In [6]:
# Run all regressions (Raw Funding Time)
print("\n=== Regressions with Raw Funding Time ===")
run_robust_regression(funding_time_y, X_female, "Funding Time ~ Female")
run_robust_regression(funding_time_y, X_loan_amount, "Funding Time ~ Loan Amount")
run_robust_regression(funding_time_y, X_hdi, "Funding Time ~ HDI")
run_robust_regression(funding_time_y, X_loan_terms, "Funding Time ~ Loan Terms")
run_robust_regression(funding_time_y, X_picture, "Funding Time ~ Picture")
run_robust_regression(funding_time_y, X_hdi_female, "Funding Time ~ HDI + Female + HDI*Female")
run_robust_regression(funding_time_y, X_picture_female, "Funding Time ~ Picture + Female + Picture*Female")
run_robust_regression(funding_time_y, X_full1, "Funding Time ~ Female + Loan Amount + HDI + Picture + Loan Terms + HDI*Female")
run_robust_regression(funding_time_y, X_full2, "Funding Time ~ Female + Loan Amount + HDI + Picture + Loan Terms + Picture*Female")


=== Regressions with Raw Funding Time ===

Regression: Funding Time ~ Female
                            OLS Regression Results                            
Dep. Variable:      funding_time_days   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                 1.316e+04
Date:                Wed, 29 Jan 2025   Prob (F-statistic):               0.00
Time:                        20:17:20   Log-Likelihood:            -2.4899e+06
No. Observations:              611828   AIC:                         4.980e+06
Df Residuals:                  611826   BIC:                         4.980e+06
Df Model:                           1                                         
Covariance Type:                  HC0                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------

ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).