public
Description: Small RDF python scraper for gumtree ads (include geocoding)
Clone URL: git://github.com/moustaki/flatscrap.git
Search Repo:
RDF serialisation
moustaki (author)
Thu Mar 06 11:07:45 -0800 2008
commit  419a804d446669005734e621d0990d3078bf34b4
tree    10e835ad624db7caaf13189c70fd4e422fb3959f
parent  f1a66b250ddc871ec225d054f5af65ee11db8e92
...
2
3
4
 
 
 
5
 
6
7
8
...
35
36
37
38
39
40
41
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
...
2
3
4
5
6
7
8
9
10
11
12
...
39
40
41
 
 
 
 
 
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
0
@@ -2,7 +2,11 @@
0
 import urllib
0
 import sys
0
 from BeautifulSoup import BeautifulSoup
0
+from rdflib import ConjunctiveGraph
0
+from rdflib import BNode, Literal, Namespace, URIRef
0
+from rdflib import plugin
0
 
0
+
0
 print "Scrapping "+sys.argv[1]
0
 
0
 f = urllib.urlopen(sys.argv[1])
0
@@ -35,9 +39,31 @@
0
 #tel = clean(soup('div',id="replyto")[0].contents[0].contents[3])
0
 
0
 
0
-print location
0
-print title
0
-print description
0
-print email
0
-print image
0
+print "Location: " + location
0
+print "Title: " + title
0
+print "Description: " + description
0
+print "Email: "+email
0
+print "Image: "+image
0
+
0
+
0
+RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
0
+RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
0
+GT = Namespace("http://purl.org/ontology/flat/")
0
+FOAF = Namespace("http://xmlns.com/foaf/0.1/")
0
+DC = Namespace("http://purl.org/dc/elements/1.1/")
0
+graph = ConjunctiveGraph()
0
+
0
+flat = BNode()
0
+p = BNode()
0
+e = URIRef(email)
0
+
0
+graph.add((flat,RDF.type,GT['Flat']))
0
+graph.add((flat,FOAF['based_near'],p))
0
+graph.add((p,RDFS.label,Literal(location)))
0
+graph.add((flat,FOAF['mbox'],e))
0
+graph.add((flat,FOAF['depiction'],image))
0
+graph.add((flat,DC['title'],Literal(title)))
0
+graph.add((flat,DC['description'],Literal(description)))
0
+
0
+print graph.serialize(format='rdf')

Comments

    No one has commented yet.