# Spatial Data Processing with PySAL

This notebook will cover basic spatial data processing and using PySAL with the Notebook


## Common Imports

In [None]:
# our convention is to alias PySAL, NumPy and Pandas
import pysal as ps
import numpy as np
import pandas as pd

In [None]:
# check the versions
ps.version

In [None]:
np.version.short_version

In [None]:
pd.__version__

## Reading a CSV File

In [None]:
!head data/mexico.csv

In [None]:
f = ps.open("data/mexico.csv")
vnames = ["pcgdp%d"%decade for decade in range(1940, 2010, 10)]

In [None]:
vnames

In [None]:
Y = np.transpose(np.array([f.by_col[v] for v in vnames]))

In [None]:
Y

In [None]:
state = f.by_col['State']

In [None]:
f.close() # done with the file

In [None]:
state

## Reading a Shapefile

In [None]:
# us counties example

In [None]:
### First the attributes from the dbf

In [None]:
dbf = ps.open('data/NAT.dbf')
header = dbf.header

In [None]:
header

In [None]:
# Read all the numeric variables into a big array
# find the first offset we need
start_col = header.index("SOUTH")

In [None]:
start_col

In [None]:
vars = header[8:]

In [None]:
vars

In [None]:
nat_array = np.array([np.array(dbf.by_col(var)) for var in vars])

In [None]:
nat_array.shape

In [None]:
nat_array = nat_array.T

In [None]:
nat_array.shape

In [None]:
dbf.close() # done with the dbf file


### Now the geometries 

In [None]:
shp_file = ps.open("data/NAT.shp")

In [None]:
shp_file.header

In [None]:
len(shp_file)

In [None]:
shapes = [ shp_file.next() for i in xrange(len(shp_file)) ]

In [None]:
type(shapes)

In [None]:
len(shapes)

In [None]:
type(shapes)

In [None]:
s0 = shapes[0]

In [None]:
s0

In [None]:
dir(s0)

In [None]:
shp_file.close()

## Reading a GeoJSON File

In [None]:
import json

In [None]:
f = open('data/nat.json')

In [None]:
fj = json.load(f)

In [None]:
f.close()

In [None]:
fj.keys()

In [None]:
features = []
for feature in fj['features']:
    features.append(feature)

In [None]:
features[0]