In [129]:
import pysal
import matplotlib.pyplot as plt
import shapefile 

In [130]:
#To illustrate this we will use data on per capita income from the lower 48 US 
#states over the period 1929-2010. The goal is to form contiguous
#regions of states displaying similar levels of income throughout this period:
#us48.shp: shapefile
#us48.dbf: dbf for shapefile
#us48.shx: index for shapefile
#usjoin.csv: attribute data (comma delimited file)

In [131]:
import numpy as np
import random


In [178]:
pysal.examples.explain('us_income')

{'description': 'Nominal per capita income for the lower 48 US states 1929-2009',
 'explanation': ['* states48.gal: queen contiguity weights in GAL format.',
  '* us48.dbf: attribute data. (k=8)',
  '* us48.shp: Polygon shapefile. (n=48)',
  '* us48.shx: spatial index.',
  '* usjoin.csv: 48 US states nominal per capita income time series 1929-2009.'],
 'name': 'us_income'}

In [180]:
#csv_path = pysal.examples.get_path('usjoin.csv')

In [158]:
shx = pysal.open(pysal.examples.get_path("us48.shx"))
dbf = pysal.open(pysal.examples.get_path("us48.dbf"))
shp= pysal.open(pysal.examples.get_path("us48.shp"))
f=pysal.open(pysal.examples.get_path("usjoin.csv"))

In [154]:
shx.FORMATS

['shp', 'shx']

In [155]:
dbf.field_info

[('DeletionFlag', 'C', 1, 0),
 ('AREA', 'N', 12, 3),
 ('PERIMETER', 'N', 12, 3),
 ('STATE_', 'N', 11, 0),
 ('STATE_ID', 'N', 11, 0),
 ('STATE_NAME', 'C', 25, 0),
 ('STATE_FIPS', 'C', 2, 0),
 ('SUB_REGION', 'C', 7, 0),
 ('STATE_ABBR', 'C', 2, 0)]

In [156]:
pci = np.array([f.by_col[str(y)] for y in range(1929, 2010)])
pci = pci.transpose()
pci.shape

<pysal.core.util.shapefile.shp_file at 0x5fc68ed9e8>

In [148]:
pci

array([[  323,   267,   224, ..., 31988, 32819, 32274],
       [  600,   520,   429, ..., 33470, 33445, 32077],
       [  310,   228,   215, ..., 31070, 31800, 31493],
       ...,
       [  460,   408,   356, ..., 29769, 31265, 31843],
       [  673,   588,   469, ..., 35839, 36594, 35676],
       [  675,   585,   476, ..., 43453, 45177, 42504]])

In [133]:
#We also require set of binary contiguity weights for the Maxp class
#w = pysal.open("../pysal/examples/states48.gal").read()
w = pysal.open(pysal.examples.get_path("states48.gal")).read()


In [134]:
w.histogram

[(1, 1), (2, 4), (3, 9), (4, 11), (5, 10), (6, 9), (7, 2), (8, 2)]

In [135]:
#Once we have the attribute data and our weights object we can create an instance of Maxp

In [136]:
np.random.seed(100)
random.seed(10)

In [137]:
r = pysal.Maxp(w, pci, floor = 5, floor_variable = np.ones((48, 1)), initial = 99)

Here we are forming regions with a minimum of 5 states in each region, so we set the floor_variable to a simple unit vector to ensure this floor constraint is satisfied. We also specify the initial number of feasible solutions to 99 - which are then searched over to pick the optimal feasible solution to then commence with the more expensive swapping component of the algorithm. [2]

The Maxp instance s has a number of attributes regarding the solution. First is the definition of the regions:

In [138]:
r.regions

[['44', '34', '3', '25', '41'],
 ['9', '47', '23', '31', '4', '24', '13', '38', '12'],
 ['8', '37', '30', '43', '14', '11', '7', '45'],
 ['16', '26', '18', '42', '36'],
 ['27', '29', '6', '17', '5'],
 ['32', '35', '19', '46', '20', '10'],
 ['22', '33', '28', '40', '1'],
 ['15', '21', '2', '39', '0']]

In [139]:
f.header

['Name',
 'STATE_FIPS',
 '1929',
 '1930',
 '1931',
 '1932',
 '1933',
 '1934',
 '1935',
 '1936',
 '1937',
 '1938',
 '1939',
 '1940',
 '1941',
 '1942',
 '1943',
 '1944',
 '1945',
 '1946',
 '1947',
 '1948',
 '1949',
 '1950',
 '1951',
 '1952',
 '1953',
 '1954',
 '1955',
 '1956',
 '1957',
 '1958',
 '1959',
 '1960',
 '1961',
 '1962',
 '1963',
 '1964',
 '1965',
 '1966',
 '1967',
 '1968',
 '1969',
 '1970',
 '1971',
 '1972',
 '1973',
 '1974',
 '1975',
 '1976',
 '1977',
 '1978',
 '1979',
 '1980',
 '1981',
 '1982',
 '1983',
 '1984',
 '1985',
 '1986',
 '1987',
 '1988',
 '1989',
 '1990',
 '1991',
 '1992',
 '1993',
 '1994',
 '1995',
 '1996',
 '1997',
 '1998',
 '1999',
 '2000',
 '2001',
 '2002',
 '2003',
 '2004',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009']

In [140]:
names = f.by_col('Name')

In [141]:
names = np.array(names)

In [142]:
print(names)

['Alabama' 'Arizona' 'Arkansas' 'California' 'Colorado' 'Connecticut'
 'Delaware' 'Florida' 'Georgia' 'Idaho' 'Illinois' 'Indiana' 'Iowa'
 'Kansas' 'Kentucky' 'Louisiana' 'Maine' 'Maryland' 'Massachusetts'
 'Michigan' 'Minnesota' 'Mississippi' 'Missouri' 'Montana' 'Nebraska'
 'Nevada' 'New Hampshire' 'New Jersey' 'New Mexico' 'New York'
 'North Carolina' 'North Dakota' 'Ohio' 'Oklahoma' 'Oregon' 'Pennsylvania'
 'Rhode Island' 'South Carolina' 'South Dakota' 'Tennessee' 'Texas' 'Utah'
 'Vermont' 'Virginia' 'Washington' 'West Virginia' 'Wisconsin' 'Wyoming']


In [175]:
for region in r.regions:
     ids = map(int,region)
     #print(names[ids])
     print(ids)
     print(list(ids))
     print(names[int(ids)])


<map object at 0x0000005FCAD12278>
[44, 34, 3, 25, 41]


TypeError: int() argument must be a string, a bytes-like object or a number, not 'map'

In [None]:
r.inference()

In [None]:
r.pvalue

In [159]:
r.area2region

{'0': 7,
 '1': 6,
 '10': 5,
 '11': 2,
 '12': 1,
 '13': 1,
 '14': 2,
 '15': 7,
 '16': 3,
 '17': 4,
 '18': 3,
 '19': 5,
 '2': 7,
 '20': 5,
 '21': 7,
 '22': 6,
 '23': 1,
 '24': 1,
 '25': 0,
 '26': 3,
 '27': 4,
 '28': 6,
 '29': 4,
 '3': 0,
 '30': 2,
 '31': 1,
 '32': 5,
 '33': 6,
 '34': 0,
 '35': 5,
 '36': 3,
 '37': 2,
 '38': 1,
 '39': 7,
 '4': 1,
 '40': 6,
 '41': 0,
 '42': 3,
 '43': 2,
 '44': 0,
 '45': 2,
 '46': 5,
 '47': 1,
 '5': 4,
 '6': 4,
 '7': 2,
 '8': 2,
 '9': 1}