Permalink
Browse files

added new feature to replicate sequences script

  • Loading branch information...
1 parent cdd451e commit 5f5a4436e267eba593dd6ecd0a8dd5617759d3cc Claus Stadler committed Jun 29, 2017
View
@@ -126,13 +126,13 @@ Again, note that Sparqlify is still in development and the supported features ar
* `lgd-osm-replicate-sequences`: Convert a timestamp to a sequence ID. This is similar to [mazdermind's replicate sequences tool](https://github.com/MaZderMind/replicate-sequences), however, our version does not require a local index. Instead, our tools combines binary search with linear interpolation: First, the the two most recent state.txt files from the given repository url are fetched, then the time differnce is computed, and based on linear interpolation a sequence id close to the given timetstamp is computed. This process is repeated recursively.
```bash
-lgd-osm-replicate-sequences -u "http://planet.openstreetmap.org/replication/hour/" -d "2017-05-28T15:00:00Z"
+lgd-osm-replicate-sequences -u "http://planet.openstreetmap.org/replication/hour/" -t "2017-05-28T15:00:00Z"
# The above command from the debian package is a wrapper for:
java -cp linkedgeodata-debian/target/linkedgeodata-debian-*-jar-with-dependencies.jar \
"org.aksw.linkedgeodata.cli.command.osm.CommandOsmReplicateSequences" \
- -u "http://planet.openstreetmap.org/replication/hour/" -d "2017-05-28T15:00:00Z"
+ -u "http://planet.openstreetmap.org/replication/hour/" -t "2017-05-28T15:00:00Z"
```
The output is a (presently subset) of the appropriate state.txt file whose timestamp is strictly less than that given as the argument.
```
@@ -146,6 +146,13 @@ timestamp=`osmconvert --out-timestamp "data.osm.pbf"`
lgd-osm-replicate-sequences -u "url-to-repo" -t "$timestamp"
```
+```bash
+# Use the -d option to option the (d)uration between the most recently published files
+lgd-osm-replicate-sequences -u "http://planet.openstreetmap.org/replication/day/" -d
+# This yields simply the output (possibly off by a few seconds)
+# 86400
+```
+
### Postgresql Database Tuning
It is recommended to tune the database according to [these recommendations](http://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server). Here is a brief summary:
Edit `/etc/postgresql/9.1/main/postgresql.conf` and set the following properties:
@@ -16,9 +16,13 @@
@Parameter(names = {"-u", "-url"}, description = "OSM Repository base URL")
public String osmReplicationRepoBaseUrl = null;
- @Parameter(names = {"-d", "-date"}, description = "Timestamp")
+ @Parameter(names = {"-t", "-timestamp"}, description = "Timestamp")
public String timestamp = null;
+ @Parameter(names = {"-d", "-duration"}, description = "Duration")
+ public Boolean returnDuration = false;
+ //public String cmd
+
public static void main(String[] args) throws Exception {
CommandOsmReplicateSequences options = new CommandOsmReplicateSequences();
@@ -27,14 +31,17 @@ public static void main(String[] args) throws Exception {
OsmRepoDao repoDao = OsmRepoDaoImpl.create(options.osmReplicationRepoBaseUrl);
- Instant instant = Instant.parse(options.timestamp);
-
+ if(options.returnDuration) {
+ System.out.println(repoDao.getUpdateInterval().getSeconds());
+ } else { // By default, print the state file for the timestamp
+ Instant instant = Instant.parse(options.timestamp);
- State state = repoDao.findState(instant);
- java.util.Properties properties = StateImpl.toProperties(new Properties(), state);
- properties.store(System.out, null);
+ State state = repoDao.findState(instant);
+ java.util.Properties properties = StateImpl.toProperties(new Properties(), state);
+ properties.store(System.out, null);
+ }
// if(commandLineArgs.isHelp())
// {
@@ -1,5 +1,6 @@
package org.aksw.linkedgeodata.osm.replication.dao;
+import java.time.Duration;
import java.time.Instant;
/**
@@ -20,4 +21,5 @@
// State getState(long seqId) throws Exception;
State findState(Instant searchTimestamp) throws Exception;
+ Duration getUpdateInterval();
}
@@ -1,6 +1,7 @@
package org.aksw.linkedgeodata.osm.replication.dao;
import java.net.URI;
+import java.time.Duration;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
@@ -13,13 +14,18 @@
implements OsmRepoDao
{
protected OsmRepoCoreDao coreDao;
- protected long avgUpdateIntervalInSec;
+ protected Duration updateInterval;
- public OsmRepoDaoImpl(OsmRepoCoreDao repoAccessor, long avgUpdateIntervalInSec) {
+ public OsmRepoDaoImpl(OsmRepoCoreDao repoAccessor, Duration updateInterval) {
super();
this.coreDao = repoAccessor;
- this.avgUpdateIntervalInSec = avgUpdateIntervalInSec;
+ this.updateInterval = updateInterval;
+ }
+
+ @Override
+ public Duration getUpdateInterval() {
+ return updateInterval;
}
@Override
@@ -45,12 +51,12 @@ public State findState(Instant searchTimestamp) throws Exception {
State currentState = this.getMostRecentState();
Range<Long> range = Range.closedOpen(0l, currentState.getSeqId());
- State result = findStatePreceedingTimestamp(currentState, searchTimestamp, avgUpdateIntervalInSec, range);
+ State result = findStatePreceedingTimestamp(currentState, searchTimestamp, updateInterval, range);
return result;
}
- public State findStatePreceedingTimestamp(State currentState, Instant searchTimestamp, long avgUpdateIntervalInS, Range<Long> seqIdRange) throws Exception {
+ public State findStatePreceedingTimestamp(State currentState, Instant searchTimestamp, Duration updateInterval, Range<Long> seqIdRange) throws Exception {
State result = null;
Range<Long> subRange;
@@ -63,7 +69,7 @@ public State findStatePreceedingTimestamp(State currentState, Instant searchTime
//Instant lowerTimestamp = timestamp.toInstant();
// Interpolate the sequence id of the lower state
- long delta = (long)(ChronoUnit.SECONDS.between(searchTimestamp, currentTimestamp) / (double)avgUpdateIntervalInS);
+ long delta = (long)(ChronoUnit.SECONDS.between(searchTimestamp, currentTimestamp) / (double)updateInterval.getSeconds());
// If the searchTimestamp is after the checkTimestamp, we need to go further back by one updateInterval
if(delta == 0) {
if(searchTimestamp.compareTo(currentTimestamp) < 0) {
@@ -87,13 +93,13 @@ public State findStatePreceedingTimestamp(State currentState, Instant searchTime
State checkState = this.getState(lowerSeqId);
//Instant checkTimestamp = checkState.getTimestamp().toInstant();
- result = findStatePreceedingTimestamp(checkState, searchTimestamp, avgUpdateIntervalInS, subRange);
+ result = findStatePreceedingTimestamp(checkState, searchTimestamp, updateInterval, subRange);
}
}
return result;
}
- public static double determineUpdateIntervalInSec(OsmRepoCoreDao repoCoreDao) throws Exception {
+ public static Duration determineUpdateIntervalInSec(OsmRepoCoreDao repoCoreDao) throws Exception {
int n = 2;
List<Instant> instants = new ArrayList<>(n);
State latest = repoCoreDao.getMostRecentState();
@@ -103,11 +109,12 @@ public static double determineUpdateIntervalInSec(OsmRepoCoreDao repoCoreDao) th
instants.add(state.getTimestamp().toInstant());
}
- double result = IntStream.range(0, n - 1)
+ double tmp = IntStream.range(0, n - 1)
.mapToLong(i -> ChronoUnit.SECONDS.between(instants.get(i + 1), instants.get(i)))
.average()
.getAsDouble();
+ Duration result = Duration.ofSeconds((long)tmp);
return result;
}
@@ -120,9 +127,11 @@ public static OsmRepoDao create(String repoBaseUriStr) throws Exception {
}
public static OsmRepoDao create(OsmRepoCoreDao coreDao) throws Exception {
- long updateIntervalInSec = (long)determineUpdateIntervalInSec(coreDao);
+ Duration updateInterval = determineUpdateIntervalInSec(coreDao);
- OsmRepoDao result = new OsmRepoDaoImpl(coreDao, updateIntervalInSec);
+ OsmRepoDao result = new OsmRepoDaoImpl(coreDao, updateInterval);
return result;
}
}
+
+
@@ -21,5 +21,8 @@ DB_MAINTENANCE_WORK_MEM=256MB
OSM_DATA_BASE_URL=http://downloads.linkedgeodata.org/debugging/monaco-170618.osm.pbf
OSM_DATA_SYNC_URL=http://download.geofabrik.de/europe/monaco-updates/
-OSM_DATA_SYNC_SLEEP=600
+
+
+#OSM_DATA_SYNC_RECHECK_INTERVAL=900
+#OSM_DATA_SYNC_UPDATE_INTERVAL=3600
@@ -36,7 +36,6 @@ services:
environment:
- OSM_DATA_BASE_URL=${OSM_DATA_BASE_URL}
- OSM_DATA_SYNC_URL=${OSM_DATA_SYNC_URL}
- - OSM_DATA_SYNC_SLEEP=${OSM_DATA_SYNC_SLEEP}
depends_on:
lgd-db:
condition: service_healthy
@@ -69,7 +68,6 @@ services:
environment:
- OSM_DATA_BASE_URL=${OSM_DATA_BASE_URL}
- OSM_DATA_SYNC_URL=${OSM_DATA_SYNC_URL}
- - OSM_DATA_SYNC_SLEEP=${OSM_DATA_SYNC_SLEEP}
depends_on:
lgd-db:
condition: service_healthy
@@ -7,8 +7,8 @@
// Website settings
@define('CONST_Database_DSN', 'pgsql://lgd:lgdpwd@lgd-db:5432/lgd'); // <driver>://<username>:<password>@<host>:<port>/<database>
@define('CONST_Website_BaseURL', '/');
- @define('CONST_Replication_Url', '${OSM_DATA_SYNC_URL');
- @define('CONST_Replication_MaxInterval', '86400'); // Process each update separately, osmosis cannot merge multiple updates
- @define('CONST_Replication_Update_Interval', '86400'); // How often upstream publishes diffs
- @define('CONST_Replication_Recheck_Interval', '900'); // How long to sleep if no update found yet
+ @define('CONST_Replication_Url', '${OSM_DATA_SYNC_URL}');
+ @define('CONST_Replication_MaxInterval', '${OSM_DATA_SYNC_UPDATE_INTERVAL}'); // Process each update separately, osmosis cannot merge multiple updates
+ @define('CONST_Replication_Update_Interval', '${OSM_DATA_SYNC_UPDATE_INTERVAL}'); // How often upstream publishes diffs
+ @define('CONST_Replication_Recheck_Interval', '${OSM_DATA_SYNC_RECHECK_INTERVAL}'); // How long to sleep if no update found yet
?>
@@ -27,7 +27,7 @@ if [ -z "$statusVal" ]; then
timestamp=`osmconvert --out-timestamp "$syncDir/data.osm.pbf"`
#curl "https://osm.mazdermind.de/replicate-sequences/?$timestamp" > sync/state.txt
- lgd-osm-replicate-sequences -u "$OSM_DATA_SYNC_URL" -d "$timestamp" > "$syncDir/state.txt"
+ lgd-osm-replicate-sequences -u "$OSM_DATA_SYNC_URL" -t "$timestamp" > "$syncDir/state.txt"
# TODO Fix lgd-createdb to include port

0 comments on commit 5f5a443

Please sign in to comment.