Skip to content

Commit

Permalink
Initial version
Browse files Browse the repository at this point in the history
  • Loading branch information
hannesj committed Dec 18, 2015
1 parent ca275b4 commit 8de480b
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 0 deletions.
94 changes: 94 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
FROM elasticsearch:2.1
MAINTAINER Reittiopas version: 0.1

# Finalize elasticsearch installation

ADD config/elasticsearch.yml /usr/share/elasticsearch/config/

RUN mkdir -p /var/lib/elasticsearch/pelias_data

ENV ES_HEAP_SIZE 4g

# Install dependencies for importers
RUN curl https://deb.nodesource.com/node_5.x/pool/main/n/nodejs/nodejs_5.3.0-1nodesource1~jessie1_amd64.deb > node.deb \
&& dpkg -i node.deb \
&& rm node.deb

RUN set -x \
&& apt-get update \
&& apt-get install -y --no-install-recommends git unzip python python-pip gdal-bin \
&& rm -rf /var/lib/apt/lists/*

RUN git clone https://github.com/openvenues/address_deduper.git \
&& cd address_deduper \
&& pip install -r requirements.txt

# Auxiliary folders
RUN rm -rf /mnt \
& mkdir -p /mnt/data/openstreetmap \
& mkdir -p /tmp/openstreetmap \
& mkdir -p /mnt/data/openaddresses \
& mkdir -p /mnt/data/quattroshapes \
& mkdir -p /mnt/data/nls-places

# Download quattroshapes data (only higher levels will be used)
WORKDIR /mnt/data/quattroshapes
RUN curl -sS -O http://quattroshapes.mapzen.com/quattroshapes/alpha3/FIN.tgz \
&& tar zxvf FIN.tgz && rm -f FIN.tgz \
&& SHAPE_ENCODING="ISO-8859-1" ogr2ogr qs_adm0.shp FIN/FIN_admin0.shp -lco ENCODING=UTF-8 \
&& SHAPE_ENCODING="ISO-8859-1" ogr2ogr qs_adm1.shp FIN/FIN_admin1.shp -lco ENCODING=UTF-8 \
&& SHAPE_ENCODING="ISO-8859-1" ogr2ogr qs_adm2.shp FIN/FIN_admin2.shp -lco ENCODING=UTF-8 \
&& SHAPE_ENCODING="ISO-8859-1" ogr2ogr qs_localadmin.shp FIN/FIN_localadmin.shp -lco ENCODI G=UTF-8 \
&& rm -rf FIN

# Download Finnish municipalities and convert these to quattroshapes format
RUN curl -sS -O http://kartat.kapsi.fi/files/kuntajako/kuntajako_10k/etrs89/gml/TietoaKuntajaosta_2015_10k.zip \
&& unzip TietoaKuntajaosta_2015_10k.zip \
&& ogr2ogr -t_srs EPSG:4326 -nlt POLYGON -splitlistfields -where "nationalLevel='4thOrder'" -f "ESRI Shapefile" kunnat.shp TietoaKuntajaosta_2015_10k/SuomenKuntajako_2015_10k.xml AdministrativeUnit -lco ENCODING=UTF-8 \
&& ogr2ogr -sql "SELECT text1 AS qs_loc FROM kunnat" -f "ESRI Shapefile" qs_localities.shp kunnat.shp -lco ENCODING=UTF-8 \
&& rm -rf TietoaKuntajaosta_2015_10k.zip TietoaKuntajaosta_2015_10k/ kunnat.*

# Download zip codes and convert these to quattroshapes format
RUN ogr2ogr -t_srs EPSG:4326 -nlt POLYGON -sql "SELECT CONCAT( posti_alue, ' ', nimi ) AS name from 'postialue:pno_meri_2015'" -f "ESRI Shapefile" qs_neighborhoods.shp WFS:http://geo.stat.fi/geoserver/postialue/postialue%3Apno_meri_2015/wfs -lco ENCODING=UTF-8

# Download OpenStreetMap
WORKDIR /mnt/data/openstreetmap
RUN curl -sS -O http://download.geofabrik.de/europe/finland-latest.osm.pbf

#TODO: find out run number from http://results.openaddresses.io/state.txt
#TODO: Add Tampere after their data has been fixed
WORKDIR /mnt/data/openaddresses
RUN curl -sS -O http://data.openaddresses.io.s3.amazonaws.com/runs/37881/fi/18/helsinki.zip \
&& unzip -o helsinki.zip \
&& rm helsinki.zip \
&& curl -sS -O http://data.openaddresses.io.s3.amazonaws.com/runs/37878/fi/14/oulu.zip \
&& unzip -o oulu.zip \
&& rm oulu.zip \
&& curl -sS -O http://data.openaddresses.io.s3.amazonaws.com/runs/32517/fi/19/turku.zip \
&& unzip -o turku.zip \
&& rm turku.zip

# Download nls paikat data
WORKDIR /mnt/data/nls-places
RUN curl -sS -O http://kartat.kapsi.fi/files/nimisto/paikat/etrs89/gml/paikat_2015_05.zip \
&& unzip paikat_2015_05.zip \
&& rm paikat_2015_05.zip

RUN git clone https://github.com/HSLdevcom/pelias-nlsfi-places-importer.git $HOME/.pelias/nls-fi-places \
&& cd $HOME/.pelias/nls-fi-places \
&& npm install

WORKDIR /root

# Copying pelias config file
ADD pelias.json pelias.json

# The address deduper will run trough the build process, so start it in the background
# Hence the single ampersand after the deduper process
RUN elasticsearch -d \
&& python /address_deduper/app.py serve \
& npm install pelias-cli \
&& pelias schema create_index \
&& node $HOME/.pelias/nls-fi-places/lib/index -d /mnt/data/nls-places \
&& pelias openaddresses import --admin-values --deduplicate \
&& pelias openstreetmap import
5 changes: 5 additions & 0 deletions config/elasticsearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
discovery.zen.ping.multicast.enabled: false

network.host: 0.0.0.0

path.data: /var/lib/elasticsearch/pelias_data
51 changes: 51 additions & 0 deletions pelias.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"esclient": {
"keepAlive": true,
"maxRetries": "3",
"deadTimeout": "3000",
"maxSockets": "20",
"hosts": [
{
"env": "dev",
"protocol": "http",
"host": "localhost",
"port": 9200
}
],
"log": [{
"type": "stdio",
"level": [ "error", "warning" ]
}]
},
"logger": {
"level": "verbose"
},
"imports": {
"quattroshapes": {
"datapath": "/mnt/data/quattroshapes/",
"adminLookup": true
},
"openstreetmap": {
"datapath": "/mnt/data/openstreetmap/",
"adminLookup": true,
"deduplicate": true,
"leveldbpath": "/tmp/openstreetmap/",
"import": [{
"type": { "node": "osmnode", "way": "osmway" },
"filename": "finland-latest.osm.pbf"
}]
},
"openaddresses": {
"datapath": "/mnt/data/openaddresses/"
}
},
"elasticsearch": {
"settings": {
"index": {
"number_of_replicas": "0",
"number_of_shards": "1",
"index.index_concurrency": "24"
}
}
}
}

0 comments on commit 8de480b

Please sign in to comment.