In [1]:
import arcpy as ARCPY
import arcgisscripting as ARC
import SSDataObject as SSDO
import SSUtilities as UTILS
import WeightsUtilities as WU
import numpy as NUM
import scipy as SCIPY
import pysal as PYSAL
import os as OS
import pandas as PANDAS

In [7]:
inputFC = r'../data/CA_Polygons.shp'
fullFC = OS.path.abspath(inputFC)
fullPath, fcName = OS.path.split(fullFC)
ssdo = SSDO.SSDataObject(inputFC)
uniqueIDField = "MYID"
fieldNames = ['PCR2010', 'POP2010', 'PERCNOHS']
ssdo.obtainData(uniqueIDField, fieldNames)
df = ssdo.getDataFrame()
print(df.head())

      PCR2010  PERCNOHS  POP2010
158  1.206422      37.0  1513043
159  1.079837      38.3     1162
160  0.886305      41.4    38026
161  0.816018      42.9   220000
162  0.877746      48.1    45485


In [8]:
import scipy.cluster.vq as CLUST
X = df.as_matrix()
whiteData = CLUST.whiten(X)
centers, distortion = CLUST.kmeans(whiteData, 6)
groups = ARC._ss.closest_centroid(whiteData, centers)
print(groups)

[4 3 0 0 1 3 4 1 3 1 0 0 1 3 1 1 1 0 5 1 2 0 0 1 0 3 3 3 3 4 3 3 0 3 1 0 4
 2 1 3 2 3 4 3 0 0 0 3 3 1 0 0 0 1 0 3 3 0]




In [9]:
import pysal as PYSAL
import WeightsUtilities as WU
import SSUtilities as UTILS

def swm2Weights(ssdo, swmfile):
    """Converts ArcGIS Sparse Spatial Weights Matrix (*.swm) file to 
    PySAL Sparse Spatial Weights Class.
    
    INPUTS:
    ssdo (class): instance of SSDataObject [1,2]
    swmFile (str): full path to swm file
    
    NOTES:
    (1) Data must already be obtained using ssdo.obtainData()
    (2) The masterField for the swm file and the ssdo object must be
        the same and may NOT be the OID/FID/ObjectID
    """
    neighbors = {}
    weights = {}
    
    #### Create SWM Reader Object ####
    swm = WU.SWMReader(swmfile)
    
    #### SWM May NOT be a Subset of the Data ####
    if ssdo.numObs > swm.numObs:
        ARCPY.AddIDMessage("ERROR", 842, ssdo.numObs, swm.numObs)
        raise SystemExit()
        
    #### Parse All SWM Records ####
    for r in UTILS.ssRange(swm.numObs):
        info = swm.swm.readEntry()
        masterID, nn, nhs, w, sumUnstandard = info
        
        #### Must Have at Least One Neighbor ####
        if nn:
            #### Must be in Selection Set (If Exists) ####
            if masterID in ssdo.master2Order:
                outNHS = []
                outW = []
                
                #### Transform Master ID to Order ID ####
                orderID = ssdo.master2Order[masterID]
                
                #### Neighbors and Weights Adjusted for Selection ####
                for nhInd, nhVal in enumerate(nhs):
                    try:
                        nhOrder = ssdo.master2Order[nhVal]
                        outNHS.append(nhOrder)
                        weightVal = w[nhInd]
                        if swm.rowStandard:
                            weightVal = weightVal * sumUnstandard[0]
                        outW.append(weightVal)
                    except KeyError:
                        pass
                
                #### Add Selected Neighbors/Weights ####
                if len(outNHS):
                    neighbors[orderID] = outNHS
                    weights[orderID] = outW
    swm.close()
    
    #### Construct PySAL Spatial Weights and Standardize as per SWM ####
    w = PYSAL.W(neighbors, weights)
    if swm.rowStandard:
        w.transform = 'R'
        
    return w


In [11]:
swmFile = OS.path.join(fullPath, "rook_bin.swm")
w = swm2Weights(ssdo, swmFile)
maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])
maxpGroups = NUM.empty((ssdo.numObs,), int)
for regionID, orderIDs in enumerate(maxp.regions):
    maxpGroups[orderIDs] = regionID
    print((regionID, orderIDs))

(0, [47, 56, 50, 3, 31, 27, 10, 30, 8, 16, 17, 28, 2, 1, 22, 45, 24, 57, 44, 54, 33, 46, 11, 7, 52, 51, 48, 20, 5])
(1, [42, 0, 40, 6, 37])
(2, [23, 34, 9, 49, 53, 38, 14, 4, 15])
(3, [29])
(4, [55, 41, 18, 39, 26, 43])
(5, [25, 19, 13, 35, 21, 32, 12])
(6, [36])


In [12]:
import Partition as PART
skater = PART.Partition(ssdo, fieldNames, spaceConcept = "GET_SPATIAL_WEIGHTS_FROM_FILE",
                        weightsFile = swmFile, kPartitions = 6)
print(skater.partition)

[1 4 4 4 5 4 1 4 4 5 4 4 5 4 5 5 4 4 0 5 2 5 4 5 4 4 5 4 4 3 4 4 5 4 5 5 3
 1 5 5 1 5 1 5 4 4 4 4 4 5 4 4 4 5 4 5 4 4]


In [13]:
ARCPY.env.overwriteOutput = True
outputFC = r'E:\Data\Conferences\esri_stat_summit_16\PYDemo\PYDemo.gdb\cluster_output'
outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)
outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)
outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)
outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}
appendFields = fieldNames + ["NEW_NAME"]
ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)