### Contrasts Overview

In [5]:
import statsmodels.api as sm

  from pandas.core import datetools


This document is based heavily on this excellent resource from UCLA http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm

#### Example Data

In [8]:
import pandas
url = 'http://www.ats.ucla.edu/stat/data/hsb2.csv'
hsb2 = pandas.read_table(url, delimiter=",")

AttributeError: 'str' object has no attribute 'read_table'

In [4]:
hsb2.head(10)

NameError: name 'hsb2' is not defined

In [None]:
hsb2.groupby('race')['write'].mean()

#### Treatment (Dummy) Coding

In [9]:
from patsy.contrasts import Treatment
levels = [1,2,3,4]
contrast = Treatment(reference=0).code_without_intercept(levels)
print contrast.matrix

[[ 0.  0.  0.]
 [ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]


In [None]:
hsb2.race.head(10)

In [None]:
print contrast.matrix[hsb2.race-1, :][:20]

In [None]:
sm.categorical(hsb2.race.values)

In [None]:
from statsmodels.formula.api import ols
mod = ols("write ~ C(race, Treatment)", data=hsb2)
res = mod.fit()
print res.summary()

### Simple Coding

In [11]:
from patsy.contrasts import ContrastMatrix

def _name_levels(prefix, levels):
    return ["[%s%s]" % (prefix, level) for level in levels]

class Simple(object):
    def _simple_contrast(self, levels):
        nlevels = len(levels)
        contr = -1./nlevels * np.ones((nlevels, nlevels-1))
        contr[1:][np.diag_indices(nlevels-1)] = (nlevels-1.)/nlevels
        return contr

    def code_with_intercept(self, levels):
        contrast = np.column_stack((np.ones(len(levels)),
                                    self._simple_contrast(levels)))
        return ContrastMatrix(contrast, _name_levels("Simp.", levels))

    def code_without_intercept(self, levels):
        contrast = self._simple_contrast(levels)
        return ContrastMatrix(contrast, _name_levels("Simp.", levels[:-1]))

In [None]:
hsb2.groupby('race')['write'].mean().mean()

In [13]:
import numpy as np
contrast = Simple().code_without_intercept(levels)
print contrast.matrix

[[-0.25 -0.25 -0.25]
 [ 0.75 -0.25 -0.25]
 [-0.25  0.75 -0.25]
 [-0.25 -0.25  0.75]]


In [None]:
mod = ols("write ~ C(race, Simple)", data=hsb2)
res = mod.fit()
print res.summary()

### Sum (Deviation) Coding

In [14]:
from patsy.contrasts import Sum
contrast = Sum().code_without_intercept(levels)
print contrast.matrix

[[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [-1. -1. -1.]]


In [None]:
mod = ols("write ~ C(race, Sum)", data=hsb2)
res = mod.fit()
print res.summary()

In [None]:
hsb2.groupby('race')['write'].mean().mean()

### Backward Difference Coding

In [None]:
from patsy.contrasts import Diff
contrast = Diff().code_without_intercept(levels)
print contrast.matrix

In [None]:
mod = ols("write ~ C(race, Diff)", data=hsb2)
res = mod.fit()
print res.summary()

In [None]:
res.params["C(race, Diff)[D.1]"]
hsb2.groupby('race').mean()["write"][2] - \
     hsb2.groupby('race').mean()["write"][1]

### Helmert Coding

In [15]:
from patsy.contrasts import Helmert
contrast = Helmert().code_without_intercept(levels)
print contrast.matrix

[[-1. -1. -1.]
 [ 1. -1. -1.]
 [ 0.  2. -1.]
 [ 0.  0.  3.]]


In [None]:
mod = ols("write ~ C(race, Helmert)", data=hsb2)
res = mod.fit()
print res.summary()

In [None]:
grouped = hsb2.groupby('race')
grouped.mean()["write"][4] - grouped.mean()["write"][:3].mean()

In [None]:
k = 4
1./k * (grouped.mean()["write"][k] - grouped.mean()["write"][:k-1].mean())
k = 3
1./k * (grouped.mean()["write"][k] - grouped.mean()["write"][:k-1].mean())

### Orthogonal Polynomial Coding

In [16]:
hsb2['readcat'] = pandas.cut(hsb2.read, bins=3)
hsb2.groupby('readcat').mean()['write']

NameError: name 'hsb2' is not defined

In [18]:
from patsy.contrasts import Poly
levels = [1,2,3,4]
contrast = Poly().code_without_intercept(levels)
print contrast.matrix

[[-0.67082039  0.5        -0.2236068 ]
 [-0.2236068  -0.5         0.67082039]
 [ 0.2236068  -0.5        -0.67082039]
 [ 0.67082039  0.5         0.2236068 ]]


In [None]:
mod = ols("write ~ C(readcat, Poly)", data=hsb2)
res = mod.fit()
print res.summary()