-
Notifications
You must be signed in to change notification settings - Fork 3
/
get_hateb_entry.py
111 lines (86 loc) · 3.16 KB
/
get_hateb_entry.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from google.appengine.api import urlfetch
import re
import datetime
import datastores
def get_counter():
query = datastores.Counter.gql('')
counter = query.get()
if not counter:
counter = datastores.Counter()
counter.counter = 0
counter.put()
return str(counter.counter)
def get_entrylist(url,sort='count',offset='0'):
entrylist_url = 'http://b.hatena.ne.jp/entrylist?sort=' + sort + '&url=' + url + '&of=' + offset
get_result = urlfetch.fetch(entrylist_url,deadline=10)
if get_result.status_code == 200:
pattern = re.compile(r'''<li class="users"> <strong><a href="/entry/(.*?)" title="はてなブックマーク - (.*?) \((.*?)ブックマーク\)">(.*?) users</a></strong></li>
<li class="timestamp">(.*?)</li>''',re.S)
parse_results = pattern.findall(get_result.content)
return [{'entry_url':row[0],
'entry_title':unicode(row[1],'utf-8'),
'bukuma_count':int(row[3]),
'hateb_added_date':date_string_to_obj(row[4])} for row in parse_results]
def put_entry(row):
query = datastores.Entries.gql('WHERE entry_url = :url',url=row['entry_url'])
entry = query.get()
if entry:
if not entry.bukuma_count == row['bukuma_count']:
entry.bukuma_count = row['bukuma_count']
entry.put()
else:
entry = datastores.Entries()
entry.entry_url = row['entry_url']
entry.entry_title = row['entry_title']
entry.bukuma_count = row['bukuma_count']
entry.hateb_added_date = row['hateb_added_date']
entry.year = str(row['hateb_added_date'].year)
entry.season = get_season(row['hateb_added_date'].month)
entry.photo_url = ''
entry.description = ''
entry.category1_id = ''
entry.category1_name = ''
entry.category2_id = ''
entry.category2_name = ''
entry.category3_id = ''
entry.category3_name = ''
entry.tsukurepo_count = 0
entry.cookpad_checked_time = datetime.datetime(2011,1,1)
entry.put()
def put_counter(upcount=0):
query = datastores.Counter.gql('')
counter = query.get()
if upcount > 0:
counter.counter = counter.counter + upcount
else:
counter.counter = 0
counter.put()
def date_string_to_obj(date_string):
year = int(date_string[0:4])
month = int(date_string[5:7])
day = int(date_string[8:10])
return datetime.datetime(year,month,day)
def get_season(month):
if month in (3,4,5):
return 'spring'
elif month in (6,7,8):
return 'summer'
elif month in (9,10,11):
return 'autumn'
else:
return 'winter'
if __name__ == "__main__":
upcount = 0
for row in get_entrylist('http://cookpad.com/recipe/','count',get_counter()):
endpoint = row['entry_title'].find(u' [クックパッド]')
if endpoint > -1:
row['entry_title'] = row['entry_title'][0:endpoint]
if row['bukuma_count'] >= 5:
put_entry(row)
upcount += 1
else:
upcount = 0
break
put_counter(upcount)