Permalink
Browse files

switch backend to webstore, load directly from there.

  • Loading branch information...
pudo committed Jul 24, 2011
1 parent f0308ae commit 2142567d25d316bc29894f6704be174cded07d9f
View
@@ -1,5 +1,4 @@
from flask import Flask, request
-from pymongo import Connection
from solr import SolrConnection
from helmut import default_settings
@@ -21,11 +20,7 @@ def request_format(fmt):
app = Flask(__name__)
app.config.from_object(default_settings)
-app.config.from_envvar('RECON_SETTINGS', silent=True)
-
-conn = Connection(app.config['MONGO_HOST'])
-db = conn[app.config['MONGO_DB']]
-entities = db[app.config['MONGO_COLLECTION']]
+app.config.from_envvar('HELMUT_SETTINGS', silent=True)
solr_host = app.config['SOLR_HOST']
@@ -6,9 +6,10 @@
TITLE = 'Helmut the Reconciliation Server'
ENTITY_NAME = 'entity'
-MONGO_HOST = 'localhost'
-MONGO_DB = 'recondb'
-MONGO_COLLECTION = ENTITY_NAME
+WEBSTORE_SERVER = 'webstore.thedatahub.org'
+WEBSTORE_USER = 'pudo'
+WEBSTORE_DB = 'helmut'
+WEBSTORE_TABLE = 'types'
SOLR_HOST = 'http://localhost:8983/solr/helmut'
View
@@ -1,61 +0,0 @@
-from datetime import datetime
-#from urllib import quote
-from dateutil import tz
-
-from pymongo import ASCENDING
-
-from helmut.core import entities, solr
-from helmut.text import normalize
-
-def datetime_add_tz(dt):
- """ Solr requires time zone information on all dates. """
- return datetime(dt.year, dt.month, dt.day, dt.hour,
- dt.minute, dt.second, tzinfo=tz.tzutc())
-
-def save_entity(path, title, alias=[], description=None, **kwargs):
- """ Save an entity to the database and to solr.
-
- Each entity is uniquely described by its path, which will be the last
- aspect of its URL.
- """
- entity = kwargs.copy()
-
- assert not '.' in path, "Full stop in path is invalid: %s" % path
- #assert quote(path)==path, "Path changes when URL quoted: %s" % path
- entity['path'] = path
-
- assert len(title), "Title has no length: %s" % title
- entity['title'] = title
- entity['alias'] = alias
-
- if description is not None:
- entity['description'] = description
-
- entity['updated_at'] = datetime.utcnow()
-
- existing = entities.find_one({'path': path})
- if existing is not None:
- existing.update(entity)
- entity = existing
- else:
- entity['created_at'] = entity['updated_at']
- entities.update({'path': path}, entity, upsert=True)
-
- entity['_collection'] = entities.name
- entity['title.n'] = normalize(title)
- entity['alias.n'] = map(normalize, alias)
- conn = solr()
- _entity = {}
- for k, v in entity.items():
- if isinstance(v, datetime):
- v = datetime_add_tz(v)
- _entity[str(k)] = v
- conn.add(**_entity)
- conn.commit()
-
-def finalize():
- """ After loading, run a few optimization operations. """
- entities.ensure_index([('path', ASCENDING)])
- conn = solr()
- conn.optimize()
- conn.commit()
View
@@ -1,6 +1,5 @@
import json
-from helmut.core import entities
from helmut.text import normalize
def field(k, v, boost=None):
@@ -12,7 +11,7 @@ def field(k, v, boost=None):
def query(solr, q, filters=(), **kw):
fq = ['+' + field(k, v) for k, v in filters]
- fq.append('_collection:%s' % entities.name)
+ #fq.append('_collection:%s' % entities.name)
if len(q) and q != '*:*':
nq = normalize(q)
_q = [
View
@@ -91,30 +91,25 @@
<fields>
- <field name="path" type="string" indexed="true" stored="true" required="true" />
- <field name="_collection" type="string" indexed="true" stored="true" />
- <field name="title" type="string" indexed="true" stored="true" required="true" />
- <field name="title.n" type="string" indexed="true" stored="true" required="true" />
+ <field name="__id__" type="string" indexed="true" stored="true" required="true" />
+ <field name="__type__" type="string" indexed="true" stored="true" />
<field name="alias" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="alias.n" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="description" type="text" indexed="true" stored="true" />
-
- <field name="created_at" type="date" indexed="true" stored="true" />
- <field name="updated_at" type="date" indexed="true" stored="true" />
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
- <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
- <field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
- <dynamicField name="*" type="text" indexed="true" stored="true"
- multiValued="true" />
+ <field name="__text__" type="text" indexed="true" stored="false" multiValued="true"/>
+ <field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" />
+ <dynamicField name="*.n" type="text" indexed="true" stored="true" />
+ <dynamicField name="*.s" type="string" indexed="true" stored="true" />
+ <dynamicField name="*" type="text" indexed="true" stored="true" />
</fields>
<uniqueKey>path</uniqueKey>
<defaultSearchField>text</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>
- <copyField source="*" dest="text"/>
+ <copyField source="*" dest="__text__"/>
</schema>
View
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@@ -117,6 +117,12 @@ header {
}
header .site-title {
+ display: block;
+ text-align: bottom;
+ background: url(/static/img/pear.png) no-repeat;
+ padding-left: 3em;
+ padding-top: 20px;
+ min-height: 50px;
font-size: 1.7em;
font-weight: bold;
}
View
@@ -10,11 +10,18 @@
<h1>{{ entity.get('title') }}</h1>
<hr/>
- Created: {{ entity.get('created_at') | date }} &middot;
- Updated: {{ entity.get('updated_at') | date }} &middot;
-
- <a href="{{ url_for('entity', path=entity.get('path')) + '.json' }}">
- <img src="/static/img/json.png"></a>
+ <table width="100%">
+ <tr>
+ <th>Key</th>
+ <th>Value</th>
+ </tr>
+ {% for k, v in entity.items() %}
+ <tr>
+ <td>{{ k }}</td>
+ <td>{{ v }}</td>
+ </tr>
+ {% endfor %}
+ </table>
</div>
{% endblock %}
View
@@ -0,0 +1,91 @@
+from datetime import datetime
+#from urllib import quote
+from dateutil import tz
+
+from webstore.client import Database
+
+from helmut.core import app, solr
+from helmut.text import normalize
+
+def datetime_add_tz(dt):
+ """ Solr requires time zone information on all dates. """
+ return datetime(dt.year, dt.month, dt.day, dt.hour,
+ dt.minute, dt.second, tzinfo=tz.tzutc())
+
+
+class Type(object):
+
+ def __init__(self, db_user, db_name, entity_table, entity_key,
+ alias_table, alias_text, alias_key):
+ self.entity_table = entity_table
+ self.entity_key = entity_key
+ self.alias_text = alias_text
+ self.alias_key = alias_key
+ self.conn = solr()
+ self.database = Database(app.config['WEBSTORE_SERVER'],
+ db_user, db_name)
+ self.alias = self.database[alias_table]
+ self.entity = self.database[entity_table]
+
+ def index(self, step=500):
+ rows = []
+ for i, row in enumerate(self.entity.traverse(_step=step)):
+ row = self.row_to_index(row)
+ rows.append(row)
+ if i % step == 0:
+ self.conn.add_many(rows, _commit=True)
+ rows = []
+ if len(rows):
+ self.conn.add_many(rows)
+ self.finalize()
+
+ def row_to_index(self, row):
+ key = row.get(self.entity_key)
+ q = {self.alias_key: key}
+ aliases = self.alias.traverse(**q)
+ aliases = map(lambda a: a.get(self.alias_text), aliases)
+ row['alias'] = aliases
+ row['title.n'] = normalize(row.get('title'))
+ row['alias.n'] = map(normalize, aliases)
+ row['__type__'] = self.entity_table
+ return row
+
+ def finalize(self):
+ """ After loading, run a few optimization operations. """
+ self.conn.optimize()
+ self.conn.commit()
+
+ def by_key(self, key):
+ return self.entity.find_one(**{self.entity_key: key})
+
+ @classmethod
+ def config(cls):
+ db = Database(app.config['WEBSTORE_SERVER'],
+ app.config['WEBSTORE_USER'],
+ app.config.get('WEBSTORE_DB', 'helmut'))
+ return db[app.config.get('WEBSTORE_TABLE', 'types')]
+
+ @classmethod
+ def _row_to_type(cls, row):
+ return cls(row['db_user'],
+ row['db_name'],
+ row['entity_table'],
+ row['entity_key'],
+ row['alias_table'],
+ row['alias_text'],
+ row['alias_key'])
+
+ @classmethod
+ def types(cls):
+ _types = []
+ for row in cls.config().traverse():
+ _types.append(cls._row_to_type(row))
+ return _types
+
+ @classmethod
+ def by_name(cls, name):
+ row = cls.config().find_one(name=name)
+ if row is not None:
+ row = cls._row_to_type(row)
+ return row
+
View
@@ -7,8 +7,9 @@
from bson.dbref import DBRef
from bson.objectid import ObjectId
-from helmut.core import app, entities, solr, request_format
+from helmut.core import app, solr, request_format
from helmut.query import query
+from helmut.types import Type
from helmut.pager import Pager
# Specific to Freebase: type system. TODO: properly implement
@@ -63,21 +64,23 @@ def search():
pager = Pager(request.args)
return render_template('search.tmpl', pager=pager)
-@app.route('/%(ENTITY_NAME)s/<path:path>.<format>' % app.config)
-@app.route('/%(ENTITY_NAME)s/<path:path>' % app.config)
-def entity(path, format=None):
- entity = entities.find_one({'path': path})
+@app.route('/<type>/<path:key>.<format>')
+@app.route('/<type>/<path:key>')
+def entity(type, key, format=None):
+ type_ = Type.by_name(type)
+ if type_ is None:
+ abort(404)
+ entity = type_.by_key(key)
if entity is None:
abort(404)
- del entity['_id']
- entity['%(ENTITY_NAME)s_url' % app.config] = \
- url_for('entity', path=path, _external=True)
+ del entity['__id__']
+ entity['__url__'] = url_for('entity', type=type, key=key, _external=True)
format = request_format(format)
if format == 'json':
return jsonify(entity)
- if 'redirect_url' in entity:
- return redirect(entity.get('redirect_url'),
- code=303)
+ #if 'redirect_url' in entity:
+ # return redirect(entity.get('redirect_url'),
+ # code=303)
return render_template('view.tmpl', entity=entity)
@@ -155,5 +158,5 @@ def reconcile():
return jsonify(meta)
if __name__ == "__main__":
- app.run()
+ app.run(port=5005)
Oops, something went wrong.

0 comments on commit 2142567

Please sign in to comment.