# Fiona

- Docs: http://toblerity.org/fiona/fiona.html
- More recent: http://juanlu001-fiona.readthedocs.io/en/latest/

In [1]:
import os
import zipfile
import geopandas as gpd
import shapely.geometry

In [63]:
#datadir = os.path.join("/media", "disk", "TIGER Data")
datadir = os.path.join("..", "..", "..", "Data")
gpd.GeoDataFrame.from_file(os.path.join(datadir, 'tl_2016_17031_roads')).head()

Unnamed: 0,FULLNAME,LINEARID,MTFCC,RTTYP,geometry
0,47th Pl Exd,110380277026,S1400,M,"LINESTRING (-87.595765 41.81460799999999, -87...."
1,Golden Spr,110380298305,S1400,M,"LINESTRING (-87.921616 41.67378399999999, -87...."
2,Edens Expy Spr,1104259027148,S1100,M,LINESTRING (-87.86542699999998 42.149693999999...
3,Edens Expy Spr,1104259564382,S1100,M,"LINESTRING (-87.832262 42.14520399999999, -87...."
4,Edens Expy Spr,1104472109755,S1100,M,"LINESTRING (-87.874499 42.152788, -87.87439399..."


In [3]:
import fiona

In [28]:
with fiona.open(os.path.join(datadir, 'tl_2016_17031_roads')) as inp:
    print(inp.schema)
    print(inp.crs)
    data = list(inp)

{'properties': OrderedDict([('LINEARID', 'str:22'), ('FULLNAME', 'str:100'), ('RTTYP', 'str:1'), ('MTFCC', 'str:5')]), 'geometry': 'LineString'}
{'init': 'epsg:4269'}


In [29]:
data[0]

{'geometry': {'coordinates': [(-87.595765, 41.814607999999986),
   (-87.595702, 41.81463399999999),
   (-87.59551599999999, 41.81471299999999),
   (-87.59545399999999, 41.81474)],
  'type': 'LineString'},
 'id': '0',
 'properties': OrderedDict([('LINEARID', '110380277026'),
              ('FULLNAME', '47th Pl Exd'),
              ('RTTYP', 'M'),
              ('MTFCC', 'S1400')]),
 'type': 'Feature'}

In [32]:
data[0]["properties"]["FULLNAME"]

'47th Pl Exd'

# Make a zip file

In [12]:
import random

frame = gpd.GeoDataFrame({
    "one" : [random.random() for _ in range(10)],
    "two" : [random.random() for _ in range(10)],
    "three" : [random.random() for _ in range(10)]
    })
frame.geometry = [shapely.geometry.Point(random.random(), random.random()) for _ in range(10)]
frame.to_file("test")

In [13]:
frame.head()

Unnamed: 0,one,three,two,geometry
0,0.102479,0.195078,0.04415,POINT (0.485480121638858 0.2280057304317772)
1,0.654113,0.958358,0.576193,POINT (0.9782933885712286 0.6094664305216008)
2,0.881228,0.941013,0.592834,POINT (0.6352825070182115 0.515952063220373)
3,0.469183,0.145761,0.664387,POINT (0.06406475315659543 0.8629081633097599)
4,0.95866,0.327739,0.888699,POINT (0.8347326380976192 0.5642710462492633)


In [15]:
zf = zipfile.ZipFile("test.zip", "w")
for name in os.listdir("test"):
    filename = os.path.join("test", name)
    outname = name
    with open(filename, "rb") as f:
        data = f.read()
    with zf.open(outname, "w") as f:
        f.write(data)
zf.close()

## Load back using `fiona`

In [17]:
fiona.listlayers("", vfs="zip://test.zip")

['test']

In [18]:
with fiona.open("", vfs="zip://test.zip") as inp:
    print(inp.schema)
    print(inp.crs)
    print(next(iter(inp)))

{'properties': OrderedDict([('one', 'float:24.15'), ('three', 'float:24.15'), ('two', 'float:24.15')]), 'geometry': 'Point'}
{}
{'type': 'Feature', 'id': '0', 'geometry': {'type': 'Point', 'coordinates': (0.485480121638858, 0.2280057304317772)}, 'properties': OrderedDict([('one', 0.10247870954811), ('three', 0.195078343273763), ('two', 0.044149706782881)])}


# Open an externally produced file

Didn't work on linux, but does on Windows.  What's different?

In [33]:
filename = os.path.join(datadir, 'tl_2016_17031_roads.zip')
filename = os.path.join("f:\\", "TIGER Data", 'tl_2016_17031_roads.zip')
print(filename)
zf = zipfile.ZipFile(filename)
zf.filelist

f:\TIGER Data\tl_2016_17031_roads.zip


[<ZipInfo filename='tl_2016_17031_roads.cpg' filemode='-rwxrwxr-x' file_size=5>,
 <ZipInfo filename='tl_2016_17031_roads.dbf' compress_type=deflate filemode='-rwxrwxr-x' file_size=10236312 compress_size=771563>,
 <ZipInfo filename='tl_2016_17031_roads.prj' compress_type=deflate filemode='-rwxrwxr-x' file_size=165 compress_size=133>,
 <ZipInfo filename='tl_2016_17031_roads.shp' compress_type=deflate filemode='-rwxrwxr-x' file_size=23335716 compress_size=13720745>,
 <ZipInfo filename='tl_2016_17031_roads.shp.ea.iso.xml' compress_type=deflate filemode='-rwxrwxrwx' file_size=20621 compress_size=2000>,
 <ZipInfo filename='tl_2016_17031_roads.shp.iso.xml' compress_type=deflate filemode='-rwxrwxrwx' file_size=291967 compress_size=11976>,
 <ZipInfo filename='tl_2016_17031_roads.shp.xml' compress_type=deflate filemode='-rwxrwxrwx' file_size=92101 compress_size=8070>,
 <ZipInfo filename='tl_2016_17031_roads.shx' compress_type=deflate filemode='-rwxrwxr-x' file_size=634900 compress_size=308395>]

In [34]:
fiona.listlayers("", vfs="zip://"+filename)

['tl_2016_17031_roads']

In [60]:
with fiona.open("", vfs="zip://"+filename) as inp:
    schema = inp.schema
    print(schema)
    print(inp.crs)
    data = list(inp)
data[0]

{'properties': OrderedDict([('LINEARID', 'str:22'), ('FULLNAME', 'str:100'), ('RTTYP', 'str:1'), ('MTFCC', 'str:5')]), 'geometry': 'LineString'}
{'init': 'epsg:4269'}


{'geometry': {'coordinates': [(-87.595765, 41.814607999999986),
   (-87.595702, 41.81463399999999),
   (-87.59551599999999, 41.81471299999999),
   (-87.59545399999999, 41.81474)],
  'type': 'LineString'},
 'id': '0',
 'properties': OrderedDict([('LINEARID', '110380277026'),
              ('FULLNAME', '47th Pl Exd'),
              ('RTTYP', 'M'),
              ('MTFCC', 'S1400')]),
 'type': 'Feature'}

In [52]:
fiona.__version__

'1.7.4'

# With shapely

`shapely` has a very nice way of forming geometry...

In [61]:
geometry = [shapely.geometry.asShape(row["geometry"]) for row in data]

In [62]:
frame = gpd.GeoDataFrame()
for name in schema["properties"]:
    frame[name] = [row["properties"][name] for row in data]
frame.geometry = geometry
frame.head()

Unnamed: 0,LINEARID,FULLNAME,RTTYP,MTFCC,geometry
0,110380277026,47th Pl Exd,M,S1400,"LINESTRING (-87.595765 41.81460799999999, -87...."
1,110380298305,Golden Spr,M,S1400,"LINESTRING (-87.921616 41.67378399999999, -87...."
2,1104259027148,Edens Expy Spr,M,S1100,LINESTRING (-87.86542699999998 42.149693999999...
3,1104259564382,Edens Expy Spr,M,S1100,"LINESTRING (-87.832262 42.14520399999999, -87...."
4,1104472109755,Edens Expy Spr,M,S1100,"LINESTRING (-87.874499 42.152788, -87.87439399..."


In [64]:
def our_load(name):
    with fiona.open(name) as inp:
        header = list(inp.schema["properties"])
        data = [list() for _ in header]
        geo = []
        for row in inp:
            for i, name in enumerate(header):
                data[i].append(row["properties"][name])
            geo.append(shapely.geometry.asShape(row["geometry"]))
    frame = gpd.GeoDataFrame({name : column for name, column in zip(header, data)})
    frame.geometry = geo
    frame.crs = inp.crs
    return frame

In [66]:
filename = os.path.join(datadir, 'tl_2016_17031_roads')
frame = our_load(filename)
print(len(frame))
frame.head()

79350


Unnamed: 0,FULLNAME,LINEARID,MTFCC,RTTYP,geometry
0,47th Pl Exd,110380277026,S1400,M,"LINESTRING (-87.595765 41.81460799999999, -87...."
1,Golden Spr,110380298305,S1400,M,"LINESTRING (-87.921616 41.67378399999999, -87...."
2,Edens Expy Spr,1104259027148,S1100,M,LINESTRING (-87.86542699999998 42.149693999999...
3,Edens Expy Spr,1104259564382,S1100,M,"LINESTRING (-87.832262 42.14520399999999, -87...."
4,Edens Expy Spr,1104472109755,S1100,M,"LINESTRING (-87.874499 42.152788, -87.87439399..."


In [67]:
frame = gpd.GeoDataFrame.from_file(filename)
print(len(frame))
frame.head()

79350


Unnamed: 0,FULLNAME,LINEARID,MTFCC,RTTYP,geometry
0,47th Pl Exd,110380277026,S1400,M,"LINESTRING (-87.595765 41.81460799999999, -87...."
1,Golden Spr,110380298305,S1400,M,"LINESTRING (-87.921616 41.67378399999999, -87...."
2,Edens Expy Spr,1104259027148,S1100,M,LINESTRING (-87.86542699999998 42.149693999999...
3,Edens Expy Spr,1104259564382,S1100,M,"LINESTRING (-87.832262 42.14520399999999, -87...."
4,Edens Expy Spr,1104472109755,S1100,M,"LINESTRING (-87.874499 42.152788, -87.87439399..."


In [68]:
%timeit(our_load(filename))

1 loop, best of 3: 3.24 s per loop


In [69]:
%timeit(gpd.GeoDataFrame.from_file(filename))

1 loop, best of 3: 9.45 s per loop
