-
Notifications
You must be signed in to change notification settings - Fork 0
/
CrawlUserStat.py
38 lines (31 loc) · 1 KB
/
CrawlUserStat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/python
#-*- coding: utf-8 -*-
import re
import urllib2
import logging
logging.basicConfig(level=logging.INFO)
fou=open("user_info.txt","wb")
for beginid in range (1,10000):
userMainUrl="http://www.acfun.tv/u/%d.aspx#area=post-history" %beginid
req=urllib2.Request(userMainUrl)
try:
resp=urllib2.urlopen(req)
except Exception as ex:
logging.info ('invalid user id: %d', beginid)
continue
respHtml=resp.read()
uspost=re.compile(r'post-history">.*?\((\d+)\)</a>')
usname=re.compile(r'name: \'(.*?)\'')
usfan=re.compile(r'#area=followers">.*?\((\d+)\)</a>')
matchpost=uspost.findall(respHtml)
matchname=usname.findall(respHtml)
matchfan=usfan.findall(respHtml)
for item in matchpost:
print item
for item2 in matchname:
print item2
for item3 in matchfan:
print item3
uid="<user id: %d>" %beginid
fou.write(uid+"<user name: "+item2+">"+"<user fans: "+item3+">"+"<user posts: "+item+">"+"\r\n")
fou.close()