# OLS using Statsmodels

###### Importing libraries

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

###### Loading Dataset

In [2]:
dataset = sm.datasets.get_rdataset("Guerry", "HistData").data

###### Fit regression model (using the natural log of one of the regressors)

In [3]:
results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dataset).fit()

###### Inspect the results

In [4]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                Lottery   R-squared:                       0.348
Model:                            OLS   Adj. R-squared:                  0.333
Method:                 Least Squares   F-statistic:                     22.20
Date:                Fri, 05 Jun 2020   Prob (F-statistic):           1.90e-08
Time:                        16:29:26   Log-Likelihood:                -379.82
No. Observations:                  86   AIC:                             765.6
Df Residuals:                      83   BIC:                             773.0
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept         246.4341     35.233     

###### Generate artificial data (2 regressors + constant)

In [5]:
nobs = 100

In [6]:
X = np.random.random((nobs, 2))
X = sm.add_constant(X)

In [7]:
beta = [1, .1, .5]

In [8]:
e = np.random.random(nobs)

In [9]:
y = np.dot(X, beta) + e

###### Fit regression model

In [10]:
results = sm.OLS(y, X).fit()

In [11]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.145
Model:                            OLS   Adj. R-squared:                  0.128
Method:                 Least Squares   F-statistic:                     8.255
Date:                Fri, 05 Jun 2020   Prob (F-statistic):           0.000489
Time:                        16:29:31   Log-Likelihood:                -18.646
No. Observations:                 100   AIC:                             43.29
Df Residuals:                      97   BIC:                             51.11
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          1.5578      0.078     19.845      0.0