# Count Data Analysis - Poisson Regression

## Introduction

## Dependencies

In [2]:
import pandas as pd
from scipy import stats  
import matplotlib.pylab as plt

import autograd as ag
import autograd.numpy as np
import autograd.scipy as sp
import functools
import scipy.optimize
import patsy
import math
import scipy

import warnings
warnings.filterwarnings('ignore')

## Functions

In [3]:
def get_aic(y, X, theta):
    return 2*len(theta) + 2*get_poisson_neg_ll(y, X, theta)

In [4]:
def get_poisson_neg_ll(y, X, theta):
    beta = theta
    mu = np.exp(np.dot(X, beta))
    
    ll = np.sum( y*np.log(mu) - mu - np.log(scipy.special.factorial(y)))
    
    neg_ll = -ll
    return neg_ll

### Open Dataset

In [5]:
data_path = 'https://raw.githubusercontent.com/PyTrans/Statistics/master/CountData_HOV_Accidents_SoCal.csv'
hov_acci_socal = pd.read_csv(data_path)

### Create a Model

In [6]:
model = 'Accidents~Lanes+Limited+AccessNum+RoadWidth+LaneWidth+InnerShoulderWidth+OuterShoulderWidth'

y_patsy, X_patsy = patsy.dmatrices(model, hov_acci_socal)
y = np.array(y_patsy).flatten()
X = np.array(X_patsy)

### Estimation

#### Log-likelihood Function

In [7]:
# init the search for max likelihood parameters
varLength = len(model.split(sep='+'))

# assume t == 1
init_theta = np.array([np.log(np.mean(y))]+[0]*(varLength), dtype=float)
    
neg_ll = functools.partial(get_poisson_neg_ll, y, X)
jacobian = ag.jacobian(neg_ll)
hessian = ag.hessian(neg_ll)

#### Optimization

In [8]:
coefficients = scipy.optimize.fmin_ncg(
        neg_ll,
        init_theta,
        fprime= jacobian,
        fhess= hessian,
        avextol=1e-8
        )

Optimization terminated successfully.
         Current function value: 29355.506033
         Iterations: 24
         Function evaluations: 41
         Gradient evaluations: 64
         Hessian evaluations: 24


### Result

In [9]:
res_var_list = model.split(sep='~')[1].split(sep='+')
res_var_list.insert(0, '(Intercept)')
print(res_var_list)

['(Intercept)', 'Lanes', 'Limited', 'AccessNum', 'RoadWidth', 'LaneWidth', 'InnerShoulderWidth', 'OuterShoulderWidth']


#### Coefficients

In [10]:
coefficients

array([ 3.09564987,  0.20361721,  0.16058925,  0.14483635,  0.00403671,
       -0.11552242, -0.02981278,  0.02585838])

#### The standard error of each parameter
Each of the model parameters has its own standard error, which is the estimated standard deviation of the error in estimating it. Note that asymptotic standard errors for the βs can be estimated by inverting the expected Hessian matrix (http://home.cc.umanitoba.ca/~godwinrt/7010/poissonregression.pdf).

In [18]:
stdErr = np.diag(np.sqrt(np.linalg.inv(hessian(coefficients))))
print(stdErr)

[0.10140769 0.02355695 0.01197166 0.00774832 0.00042171 0.00821016
 0.00167327 0.00286008]


#### The z value

In [13]:
z_value = coefficients/stdErr
print(z_value)

[ 30.52677579   8.64361615  13.4141158   18.69260141   9.5723434
 -14.07066419 -17.81707947   9.04114825]


#### Pr(>|z|)

In [14]:
p_value = np.round(stats.norm.sf(abs(z_value)) * 2, 4)
print(p_value)

[0. 0. 0. 0. 0. 0. 0. 0.]


#### The estimated y value

$\hat{Y}= exp(X\beta)$

In [15]:
y_hat = np.exp(np.dot(X, coefficients))
y_hat

array([12.54281975, 15.4050247 , 15.64961999, ..., 15.53502949,
       14.45762775, 13.14292092])

#### Deviance

In [16]:
# ll(regression)
ll_reg = - neg_ll(coefficients)

In [19]:
# ll(null)
model_null = 'Accidents~1'
y_patsy, X_patsy = patsy.matrices(model_null, sca_hov_acc)
y_null = np.array(y_patsy).flatten()
X_null = np.array(X_patsy)

AttributeError: module 'patsy' has no attribute 'matrices'

In [None]:
init_theta = [0]
neg_ll = functools.partial(get_poisson_neg_ll, y_null, X_null)
jacobian = ag.jacobian(neg_ll)
hessian = ag.hessian(neg_ll)

In [None]:
intercept = scipy.optimize.fmin_ncg(
        neg_ll,
        init_theta,
        fprime= jacobian,
        fhess= hessian,
        avextol=1e-8
        )

In [None]:
ll_null = - neg_ll(intercept)

In [None]:
## ll(saturated)
ll_sat_arr = y * np.log(y) - y - np.log(scipy.special.factorial(y))
ll_sat_list = []
for ls in ll_sat_arr:
    if np.isnan(ls):
        ll_sat_list.append(0)
    else:
        ll_sat_list.append(ls)
ll_sat = np.sum(ll_sat_list)
print(ll_sat)

https://stats.stackexchange.com/questions/316763/log-likelihood-function-in-poisson-regression

https://stats.stackexchange.com/questions/108995/interpreting-residual-and-null-deviance-in-glm-r

In [None]:
print('Null deviance: {}'.format(-2 * (ll_null - np.sum(ll_sat))))
print('Residual deviance: {}'.format(-2 * (ll_reg - np.sum(ll_sat))))

#### Deviance residuals

residuals.glm in r

$$d_i = sgn\left(y_i - exp(X_i \hat{\beta})\right) \sqrt{2\left\{y_i log\left(\frac{y_i}{exp(X_i \hat{\beta})}\right)-\left(y_i - exp(X_i \hat{\beta})\right)\right\}}$$

In [None]:
y_hat = np.exp(np.dot(X, coefficients))

residuals = y - y_hat

rtTerm = []
for i in range(len(y)):
    rtTerm1 = y[i] * np.log(y[i] / y_hat[i])
    rtTerm2 = y[i] - y_hat[i]
    
    if np.isnan(rtTerm1):
        rtTerm.append(2*(0 - rtTerm2))
    else:
        rtTerm.append(2*(rtTerm1 - rtTerm2))
rtTerm  = np.array(rtTerm)

d = np.sign(residuals) * np.sqrt(rtTerm)
print(d.max())
print(d.min())

#### AICc

In [None]:
get_aic(y, X, coefficients)