Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Feng-Gao committed Apr 15, 2019
1 parent d984d95 commit 286b63a
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions scraper.py
Expand Up @@ -27,7 +27,7 @@

for i in range(index,max_index+1):
url = base_url + str(i)
print(url)
#print(url)

result = requests.get(url,headers=headers)
soup = BeautifulSoup(result.content,features='lxml')
Expand All @@ -39,8 +39,8 @@
package_url = "http://data.seoul.go.kr/dataList/"+p.dl.a['href']
package_name = p.find(attrs={'class':'In_Titles'}).span.next.next.strip()
package_topics = p.find(attrs={'class':'In_Titles'}).span.text.strip()
print(package_url)
print(package_name)
#print(package_url)
#print(package_name)
try:
imgs = p.find(attrs={'class':'In_Ico'}).find_all('img')
format = []
Expand Down Expand Up @@ -70,7 +70,7 @@
#note for tags, it might be splited by , or chinese , or chinese 、
row = package_url+','+package_name+','+package_desc+','+package_org+','+package_topics\
+','+package_tags+','+package_format+','+package_created+','+package_frequency+','+package_view+'\n'
print(row)
#print(row)
package_dict = {
'today':today_date,
'url':package_url,
Expand All @@ -87,7 +87,7 @@

}
scraperwiki.sqlite.save(unique_keys=['today','name'],data=package_dict)
print('****************end---'+package_name+'---end****************')
#print('****************end---'+package_name+'---end****************')
except Exception as ex:
print(ex)
print(package_url + ' problem occurs and will re-try')
Expand All @@ -101,8 +101,8 @@
package_url = p['url']
package_name = p['name']
package_topics = p['topics']
print(package_url)
print(package_name)
#print(package_url)
#print(package_name)
try:
imgs = p.find(attrs={'class':'In_Ico'}).find_all('img')
format = []
Expand Down Expand Up @@ -131,7 +131,7 @@
#note for tags, it might be splited by , or chinese , or chinese 、
row = package_url+','+package_name+','+package_desc+','+package_org+','+package_topics\
+','+package_tags+','+package_format+','+package_created+','+package_frequency+','+package_view+'\n'
print(row)
#print(row)
package_dict = {
'today':today_date,
'url':package_url,
Expand All @@ -148,7 +148,7 @@

}
scraperwiki.sqlite.save(unique_keys=['today','name'],data=package_dict)
print('****************end---'+package_name+'---end****************')
#print('****************end---'+package_name+'---end****************')
except Exception as ex:
print(ex)
print(package_url + ' problem occurs and will re-try')
Expand Down

0 comments on commit 286b63a

Please sign in to comment.