public
Description: Small RDF python scraper for gumtree ads (include geocoding)
Clone URL: git://github.com/moustaki/flatscrap.git
Search Repo:
email
moustaki (author)
Thu Mar 06 10:34:12 -0800 2008
commit  7287e66d88e9d5392567a2ebaa196c3920c3b89c
tree    88631b804ae66f44f2cd96f6f2efabd7ef59e038
parent  18801149edcbb9815f5c155694c0cb342efb9642
...
11
12
13
 
 
 
 
 
 
14
15
16
17
 
 
18
 
 
 
 
 
 
19
 
20
21
22
 
...
11
12
13
14
15
16
17
18
19
20
21
 
 
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
0
@@ -11,13 +11,27 @@
0
 
0
 soup = BeautifulSoup(html)
0
 
0
+def clean(atom):
0
+ t1 = ''.join(atom.rsplit(' '))
0
+ t2 = ''.join(t1.rsplit('\n'))
0
+ return t2
0
+
0
+
0
 # Now, let's scrap!
0
 
0
-location = soup('span','location')[0].contents[1][6:]
0
-title = soup('div',id="title")[0].contents[0].contents[0]
0
+location = clean(soup('span','location')[0].contents[1])
0
+title = clean(soup('div',id="title")[0].contents[0].contents[0])
0
 description = soup('div',id="desc")[0].contents[0].contents[0].contents[0]
0
+email1 = soup('span','email')[0].contents[2].attrs[0][1]
0
+if email1.startswith('/cgi-bin'):
0
+ email = "http://www.gumtree.com"+email1
0
+else :
0
+ email = email1
0
+#tel = clean(soup('div',id="replyto")[0].contents[0].contents[3])
0
 
0
+
0
 print location
0
 print title
0
 print description
0
+print email

Comments

    No one has commented yet.