### Empirical Example for RobustiPY (Type 1)!

#### This example is designed to show how basic functionality -- such as OLS can be used with a cross-sectional dataset -- works.

First we load some utility libraries. All of this sould be either in your standard Python or should come as dependencies of `robustipy`.

In [1]:
import os # for filepath handling
import pandas as pd
import requests # for grabbing the dataset
import matplotlib.pyplot as plt # for plotting
from robustipy.models import OLSRobust

We now create a helper function to grab the data from the web and save it in some appropiate directories:

In [2]:
def get_data():
    os.makedirs(os.path.join('..',
                             'data',
                             'ehrlich_example',
                            ),
                exist_ok=True
               )
    uscrime_path = os.path.join('..',
                                'data',
                                'ehrlich_example',
                                'uscrime.txt')
    if os.path.exists(uscrime_path):
        df = pd.read_csv(uscrime_path, sep=r"\s+")
    else:
        df = pd.read_csv("https://fermin.perso.math.cnrs.fr/Files/UScrime.txt",
                         sep=r"\s+", skiprows=35, index_col=0)
        df.to_csv(uscrime_path, sep='\t')
    df = df.rename({'M': 'Males',
                    'LF': 'Labour Force',
                    'W': 'Wealth',
                    'X': 'Inequality',
                    'U1': 'Unemployment',
                    'Ex0': 'Expenditure'}, axis=1)    
    return df

Lets now laod the data into a pandas dataframe and peak what's inside:

In [3]:
df = get_data()
df.reset_index(drop=False, inplace=True)
df.head()

Unnamed: 0,index,R,Age,Ed,Expenditure,Ex1,Labour Force,Males,N,NW,Unemployment,U2,Wealth,Inequality
0,0,79.1,151,91,58,56,510,950,33,301,108,41,394,261
1,1,163.5,143,113,103,95,583,1012,13,102,96,36,557,194
2,2,57.8,142,89,45,44,533,969,18,219,94,33,318,250
3,3,196.9,136,121,149,141,577,994,157,80,102,39,673,167
4,4,123.4,141,121,109,101,591,985,18,30,91,20,578,174


In [7]:
def run_regression(formula, data, label):
    import statsmodels.formula.api as smf
    model = smf.ols(formula=formula, data=data).fit(cov_type='HC1')
    print(f"\n--- Column {label} (HC1) ---")
    print(model.summary())
    
formula_str = f"R ~ Inequality"
run_regression(formula_str, df, label=1)


--- Column 1 (HC1) ---
                            OLS Regression Results                            
Dep. Variable:                      R   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.011
Method:                 Least Squares   F-statistic:                     2.010
Date:                Sun, 12 Oct 2025   Prob (F-statistic):              0.163
Time:                        05:04:45   Log-Likelihood:                -237.21
No. Observations:                  47   AIC:                             478.4
Df Residuals:                      45   BIC:                             482.1
Df Model:                           1                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    124.1773     26

In [8]:
formula_str = f"R ~ Inequality + Age"
run_regression(formula_str, df, label=1)


--- Column 1 (HC1) ---
                            OLS Regression Results                            
Dep. Variable:                      R   R-squared:                       0.033
Model:                            OLS   Adj. R-squared:                 -0.011
Method:                 Least Squares   F-statistic:                    0.9856
Date:                Sun, 12 Oct 2025   Prob (F-statistic):              0.381
Time:                        05:05:32   Log-Likelihood:                -237.19
No. Observations:                  47   AIC:                             480.4
Df Residuals:                      44   BIC:                             485.9
Df Model:                           2                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    111.2518     46

In [9]:
formula_str = f"R ~ Inequality + Age + Ed + Unemployment + Males + N + Wealth"
run_regression(formula_str, df, label=1)


--- Column 1 (HC1) ---
                            OLS Regression Results                            
Dep. Variable:                      R   R-squared:                       0.522
Model:                            OLS   Adj. R-squared:                  0.436
Method:                 Least Squares   F-statistic:                     7.276
Date:                Sun, 12 Oct 2025   Prob (F-statistic):           1.39e-05
Time:                        05:06:00   Log-Likelihood:                -220.64
No. Observations:                  47   AIC:                             457.3
Df Residuals:                      39   BIC:                             472.1
Df Model:                           7                                         
Covariance Type:                  HC1                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept     -764.2379 

We now create a `robustipy` object with our variables of interest and we allow it to fit models with all the possible combitation of covariates:

In [None]:
ehrlich_robust = OLSRobust(y=['R'], x=['Inequality'], data=df)
ehrlich_robust.fit(controls=['Expenditure',
                             'Age',
                             'Ed',
                             'Unemployment',
                             'Males',
                             'N',
                             'Wealth'],
                   draws=1000,
                   kfold=10,
                   oos_metric='pseudo-r2',
                   seed=192735,
                  )

Get the results:

In [None]:
ehrlich_results = ehrlich_robust.get_results()

Plot them:

In [None]:
ehrlich_results.plot(specs=[['Ed', 'N'],
                            ['Age', 'Males', 'Wealth']],
                     ic='hqic',
                     ci=1,
                     ext='pdf',
                     project_name='ehrlich_example',
                     figpath = '../figures',
                     figsize=(16, 16)
                    )

Summarize them (with four digits):

In [None]:
ehrlich_results.summary(digits=4)