0
+from BeautifulSoup import BeautifulSoup
0
+from geopy import geocoders
0
+from rdflib import ConjunctiveGraph
0
+from rdflib import BNode, Literal, Namespace, URIRef
0
+from rdflib import plugin
0
+from rdflib.syntax.serializers import TurtleSerializer
0
+#print "Scrapping "+sys.argv[1]
0
+ def __init__(self,url) :
0
+ def __clean(self,atom):
0
+ t1 = ''.join(atom.rsplit(' '))
0
+ t2 = ''.join(t1.rsplit('\n'))
0
+ def scrape(self,geolocation=True, geostring='London UK') :
0
+ f = urllib.urlopen(self.url)
0
+ soup = BeautifulSoup(html)
0
+ self.location = self.__clean(soup('span','location')[0].contents[1])
0
+ self.title = self.__clean(soup('div',id="title")[0].contents[0].contents[0])
0
+ self.description = soup('div',id="desc")[0].contents[0].contents[0].contents[0]
0
+ email1 = soup('span','email')[0].contents[2].attrs[0][1]
0
+ if email1.startswith('/cgi-bin'):
0
+ self.email = "http://www.gumtree.com"+email1
0
+ self.image = "http://www.gumtree.com"+soup('div',id="images")[0].contents[1].attrs[0][1]
0
+ #tel = clean(soup('div',id="replyto")[0].contents[0].contents[3])
0
+ search = self.location + " " + geostring
0
+ g = geocoders.Google('ABQIAAAAu0AMQcAkvqfViJpEeSH_-hT2yXp_ZAY8_ufC3CFXhHIE1NvwkxQ0_Z6CDgX2Q08wvAh1aYjckybfeA')
0
+ self.place, (self.lat,self.lng) = g.geocode(search)
0
+ #print "Location: " + location
0
+ #print "Title: " + title
0
+ #print "Description: " + description
0
+ #print "Email: "+email
0
+ #print "Image: "+image
0
+ RDF = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
0
+ RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
0
+ GT = Namespace("http://purl.org/ontology/flat/")
0
+ FOAF = Namespace("http://xmlns.com/foaf/0.1/")
0
+ DC = Namespace("http://purl.org/dc/elements/1.1/")
0
+ WGS = Namespace("http://www.w3.org/2003/01/geo/wgs84_pos#")
0
+ graph = ConjunctiveGraph()
0
+ flat = URIRef("#flat")
0
+ e = URIRef(self.email)
0
+ i = URIRef(self.image)
0
+ graph.add((flat,RDF.type,GT['Flat']))
0
+ graph.add((flat,FOAF['based_near'],p))
0
+ graph.add((p,RDFS.label,Literal(self.place)))
0
+ graph.add((p,DC['title'],Literal(self.location)))
0
+ graph.add((p,WGS['lat'],Literal(self.lat)))
0
+ graph.add((p,WGS['long'],Literal(self.lng)))
0
+ graph.add((flat,FOAF['mbox'],e))
0
+ graph.add((flat,FOAF['depiction'],i))
0
+ graph.add((flat,DC['title'],Literal(self.title)))
0
+ graph.add((flat,DC['description'],Literal(self.description)))
0
+ print graph.serialize(destination=file,format='xml')
0
+# fs = FlatScrap(sys.argv[1])
Comments
No one has commented yet.