/
directed_edge.rb
558 lines (453 loc) · 16.6 KB
/
directed_edge.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
# Copyright (C) 2009 Directed Edge Ltd.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
require 'rubygems'
require 'rest_client'
require 'rexml/document'
require 'cgi'
# The DirectedEdge module contains three classes:
#
# - Database - encapsulation of connection a database hosted by Directed Edge.
# - Exporter - simple mechanism for exporting data from existing data sources.
# - Item - item (user, product, page) in a Directed Edge database.
module DirectedEdge
# A Database is an encapsulation of a database being accessed via the Directed
# Edge web-services API. You can request database creation by visiting
# http://www.directededge.com and will recieve a user name and password which
# are then used to connect to your DirectedEdge::Database instance.
#
# Usually when getting started with a DirectedEdge database, users would like to
# import some pre-existing data, usually from their web application's database.
# The Database class has an import method which can be used to import data using
# Directed Edge's XML format. Files formatted in that way may be created with
# the Exporter.
#
# A database is typically instantiated via:
#
# database = DirectedEdge::Database.new('mydatabase', 'mypassword')
class Database
# The name of the database.
attr_reader :name
# The REST resource used for connecting to the database.
attr_reader :resource
# Creates a connection to a Directed Edge database. The name and password
# should have been provided when the account was created. The protocol
# parameter is optional and may be <tt>http</tt> or <tt>https</tt>.
# <tt>http</tt> is used by default as it is somewhat lower latency.
def initialize(name, password='', protocol='http')
@name = name
host = ENV['DIRECTEDEDGE_HOST'] || 'webservices.directededge.com'
@resource =
RestClient::Resource.new("#{protocol}://#{name}:#{password}@#{host}/api/v1/#{name}")
end
# Imports a Directed Edge XML file to the database.
#
# See http://developer.directededge.com for more information on the XML format or the
# Exporter for help on creating a file for importing.
def import(file_name)
@resource.put(File.read(file_name), :content_type => 'text/xml')
end
end
# A very simple class for creating Directed Edge XML files. This can be done for
# example with:
#
# exporter = DirectedEdge::Exporter.new('mydatabase.xml')
# item = DirectedEdge::Item.new(exporter.database, 'product_1')
# item.add_tag('product')
# exporter.export(item)
# exporter.finish
#
# <tt>mydatabase.xml</tt> now contains:
#
# <?xml version="1.0" encoding="UTF-8"?>
# <directededge version="0.1">
# <item id='product_1'><tag>product</tag></item>
# </directededge>
#
# Which can then be imported to a database on the server with:
#
# database = DirectedEdge::Database.new('mydatabase', 'mypassword')
# database.import('mydatabase.xml')
#
# Items may also be exported from existing databases.
class Exporter
# Provides a dummy database for use when creating new items to be exported.
attr_reader :database
# Begins exporting a collection of items to the given file_name. Any
# existing contents will be overwritten.
def initialize(file_name)
@database = Database.new('exporter')
@file = File.new(file_name, 'w')
@file.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n")
@file.write("<directededge version=\"0.1\">\n")
end
# Exports the given item to the file passed to the constructor.
def export(item)
@file.write("#{item.to_xml}\n")
end
# Writes a closing XML element to the document and closes the file.
def finish
@file.write("</directededge>\n")
@file.close
end
end
# Represents an item in a Directed Edge database. Items can be products, pages
# or users, for instance. Usually items groups are differentiated from one
# another by a set of tags that are provided.
#
# For instance, a user in the Directed Edge database could be modeled as:
#
# user = DirectedEdge::Item.new(database, 'user_1')
# user.add_tag('user')
# user.save
#
# Similarly a product could be:
#
# product = DirectedEdge::Item.new(database, 'product_1')
# product.add_tag('product')
# product['price'] = '$42'
# product.save
#
# Note here that items have tags and properties. Tags are a free-form set of
# text identifiers that can be associated with an item, e.g. "user", "product",
# "page", "science fiction", etc.
#
# Properties are a set of key-value pairs associated with the item. For example,
# <tt>product['price'] = '$42'</tt>, or <tt>user['first name'] = 'Bob'</tt>.
#
# If we wanted to link the user to the product, for instance, indicating that the
# user had purchased the product we can use:
#
# user.link_to(product)
# user.save
class Item
# The unique item identifier used by the database and specified in the item's
# constructor.
attr_reader :id
# Initializes the item with the value id.
# * Note this does not create the item in the database if it does not exist
# * See also create
def initialize(database, id)
@database = database
@id = id
@links = {}
@tags = Set.new
@properties = {}
@links_to_remove = Set.new
@tags_to_remove = Set.new
@properties_to_remove = Set.new
@resource = @database.resource[URI.escape(@id)]
@cached = false
end
# Returns true if the other item is the same. The item given can either be
# a string or an item object.
def ==(other)
if other.is_a?(Item)
other.id == id
else
other.to_s == id
end
end
# Returns the item's ID.
def name
@id
end
# Creates an item if it does not already exist in the database or overwrites
# an existing item if one does.
def create(links={}, tags=Set.new, properties={})
@links = links
@tags = tags
@properties = properties
# Here we pretend that it's cached since this is now the authoritative
# copy of the values.
@cached = true
save
end
# Writes all changes to links, tags and properties back to the database and
# returns this item.
def save
if @cached
put(complete_document)
else
# The web services API allows to add or remove things incrementally.
# Since we're not in the cached case, let's check to see which action(s)
# are appropriate.
if !@links.empty? || !@tags.empty? || !@properties.empty?
put(complete_document, 'add')
end
if !@links_to_remove.empty? || !@tags_to_remove.empty? || !@properties_to_remove.empty?
put(removal_document, 'remove')
@links_to_remove.clear
@tags_to_remove.clear
@properties_to_remove.clear
end
end
self
end
# Reloads (or loads) the item from the database. Any unsaved changes will
# will be discarded.
def reload
document = read_document
@links = hash_from_document(document, 'link', 'weight')
@tags = Set.new(list_from_document(document, 'tag'))
@properties = {}
@links_to_remove.clear
@tags_to_remove.clear
@properties_to_remove.clear
document.elements.each('//property') do |element|
@properties[element.attribute('name').value] = element.text
end
@cached = true
end
# Returns a set of items that are linked to from this item.
def links
read
@links
end
# Returns a set containing all of this item's tags.
def tags
read
@tags
end
# Returns a hash of all of this item's properties.
def properties
read
@properties
end
# Returns the property for the name specified.
def [](property_name)
read
@properties[property_name]
end
# Assigns value to the given property_name.
#
# This will not be written back to the database until save is called.
def []=(property_name, value)
@properties_to_remove.delete(property_name)
@properties[property_name] = value
end
# Remove the given property_name.
def clear_property(property_name)
if !@cached
@properties_to_remove.add(property_name)
end
@properties.delete(property_name)
end
# Removes an item from the database, including deleting all links to and
# from this item.
def destroy
@resource.delete
end
# Creates a link from this item to other.
#
# Weighted links are typically used to encode ratings. For instance, if
# a user has rated a given product that can be specified via:
#
# user = DirectedEdge::Item(database, 'user_1')
# product = DirectedEdge::Item(database, 'product_1') # preexisting item
# user.link_to(product, 5)
# user.save
#
# If no link is specified then a tradtional, unweighted link will be
# created. This is typical to, for instance, incidate a purchase or click
# from a user to a page or item.
#
# Weights may be in the range of 1 to 10.
def link_to(other, weight=0)
if weight < 0 || weight > 10
raise RangeError
end
@links_to_remove.delete(other)
@links[other.to_s] = weight
end
# Deletes a link from this item to other.
#
# The changes will not be reflected in the database until save is called.
def unlink_from(other)
if !@cached
@links_to_remove.add(other.to_s)
end
@links.delete(other.to_s)
end
# If there is a link for "other" then it returns the weight for the given
# item. Zero indicates that no weight is assigned.
def weight_for(other)
read
@links[other.to_s]
end
# Adds a tag to this item.
#
# The changes will not be reflected in the database until save is called.
def add_tag(tag)
@tags_to_remove.delete(tag)
@tags.add(tag)
end
# Removes a tag from this item.
#
# The changes will not be reflected in the database until save is called.
def remove_tag(tag)
if !@cached
@tags_to_remove.add(tag)
end
@tags.delete(tag)
end
# Returns the list of items related to this one. Unlike "recommended" this
# may include items which are directly linked from this item. If any tags
# are specified, only items which have one or more of the specified tags
# will be returned.
#
# This will not reflect any unsaved changes to items.
def related(tags=Set.new)
document = read_document('related?tags=' + tags.to_a.join(','))
list_from_document(document, 'related')
end
# Returns the list of items recommended for this item, usually a user.
# Unlike "related" this does not include items linked from this item. If
# any tags are specified, only items which have one or more of the specified
# tags will be returned.
#
# This will not reflect any unsaved changes to items.
def recommended(tags=Set.new)
document = read_document('recommended?excludeLinked=true&tags=' + tags.to_a.join(','))
list_from_document(document, 'recommended')
end
# Returns the ID of the item.
def to_s
@id
end
# Returns an XML representation of the item as a string not including the
# usual document regalia, e.g. starting with <item> (used for exporting the
# item to a file)
def to_xml
insert_item(REXML::Document.new).to_s
end
private
# Returns an array of the elements from the document matching the given
# element name.
def list_from_document(document, element)
values = []
document.elements.each("//#{element}") { |v| values.push(v.text) }
values
end
# Returns a hash of the elements from the document matching the given
# element name. If the specified attribute is present, its value will
# be assigned to the hash, otherwise the default value given will be
# used.
def hash_from_document(document, element, attribute, default=0)
values = {}
document.elements.each("//#{element}") do |v|
value = v.attribute(attribute).to_s || default
if value.empty?
values[v.text] = default
elsif value.to_i.to_s == value.to_s
values[v.text] = value.to_i
else
values[v.text] = value.to_s
end
end
values
end
# Reads the tags / links / properties from the server if they are not
# already cached.
def read
if !@cached
begin
document = read_document
@links.merge!(hash_from_document(document, 'link', 'weight'))
@tags.merge(list_from_document(document, 'tag'))
document.elements.each('//property') do |element|
name = element.attribute('name').value
if !@properties.has_key?(name)
@properties[name] = element.text
end
end
@links_to_remove.each { |link| @links.delete(link) }
@tags_to_remove.each { |tag| @tags.delete(tag) }
@properties_to_remove.each { |property| @properties.delete(property) }
@links_to_remove.clear
@tags_to_remove.clear
@properties_to_remove.clear
@cached = true
rescue
puts "Couldn't read \"#{@id}\" from the database."
end
end
end
# Uploads the changes to the Directed Edge database. The optional method
# parameter may be used for either add or remove which do only incremental
# updates to the item.
def put(document, method='')
@resource[method].put(document.to_s, :content_type => 'text/xml')
end
# Reads an item from the database and puts it into an XML document.
def read_document(method='')
REXML::Document.new(@resource[method].get(:accept => 'text/xml'))
end
# Creates a document for an entire item including the links, tags and
# properties.
def complete_document
document = REXML::Document.new
insert_item(document)
end
def removal_document
item = setup_document(REXML::Document.new)
@links_to_remove.each { |link| item.add_element('link').add_text(link.to_s) }
@tags_to_remove.each { |tag| item.add_element('tag').add_text(tag.to_s) }
@properties_to_remove.each do |property|
item.add_element('property').add_attribute('name', property.to_s)
end
item
end
def insert_item(document)
item = setup_document(document)
@links.each do |link, weight|
element = item.add_element('link')
if weight != 0
element.add_attribute('weight', weight.to_s)
end
element.add_text(link.to_s)
end
@tags.each { |tag| item.add_element('tag').add_text(tag.to_s) }
@properties.each do |key, value|
property = item.add_element('property')
property.add_attribute('name', key.to_s)
property.add_text(value.to_s)
end
item
end
# Creates a skeleton of an XML document for a given item.
def item_document(element, value)
document = REXML::Document.new
item = setup_document(document)
item.add_element(element).add_text(value.to_s)
document
end
# Sets up an existing XML document with the skeleton Directed Edge elements.
def setup_document(document)
directededge = document.add_element('directededge')
directededge.add_attribute('version', '0.1')
item = directededge.add_element('item')
item.add_attribute('id', @id.to_s)
item
end
end
end