0
+from BeautifulSoup import BeautifulSoup
0
+from geopy import geocoders
0
+from rdflib import ConjunctiveGraph
0
+from rdflib import BNode, Literal, Namespace, URIRef
0
+from rdflib import plugin
0
+#print "Scrapping "+sys.argv[1]
0
+f = urllib.urlopen(sys.argv[1])
0
+soup = BeautifulSoup(html)
0
+ t1 = ''.join(atom.rsplit(' '))
0
+ t2 = ''.join(t1.rsplit('\n'))
0
+location = clean(soup('span','location')[0].contents[1])
0
+title = clean(soup('div',id="title")[0].contents[0].contents[0])
0
+description = soup('div',id="desc")[0].contents[0].contents[0].contents[0]
0
+email1 = soup('span','email')[0].contents[2].attrs[0][1]
0
+if email1.startswith('/cgi-bin'):
0
+ email = "http://www.gumtree.com"+email1
0
+ image = "http://www.gumtree.com"+soup('div',id="images")[0].contents[1].attrs[0][1]
0
+#tel = clean(soup('div',id="replyto")[0].contents[0].contents[3])
0
+g = geocoders.Google('ABQIAAAAu0AMQcAkvqfViJpEeSH_-hT2yXp_ZAY8_ufC3CFXhHIE1NvwkxQ0_Z6CDgX2Q08wvAh1aYjckybfeA')
0
+place, (lat,lng) = g.geocode(location)
0
+#print "Location: " + location
0
+#print "Title: " + title
0
+#print "Description: " + description
0
+RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
0
+RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
0
+GT = Namespace("http://purl.org/ontology/flat/")
0
+FOAF = Namespace("http://xmlns.com/foaf/0.1/")
0
+DC = Namespace("http://purl.org/dc/elements/1.1/")
0
+WGS = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
0
+graph = ConjunctiveGraph()
0
+graph.add((flat,RDF.type,GT['Flat']))
0
+graph.add((flat,FOAF['based_near'],p))
0
+graph.add((p,RDFS.label,Literal(place)))
0
+graph.add((p,DC['title'],Literal(location)))
0
+graph.add((p,WGS['lat'],Literal(lat)))
0
+graph.add((p,WGS['long'],Literal(lng)))
0
+graph.add((flat,FOAF['mbox'],e))
0
+graph.add((flat,FOAF['depiction'],i))
0
+graph.add((flat,DC['title'],Literal(title)))
0
+graph.add((flat,DC['description'],Literal(description)))
0
+print graph.serialize(destination=sys.argv[2],format='rdf')
Comments
No one has commented yet.