-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
151 lines (120 loc) · 4.86 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import time
import requests
import json
import pymongo
from bson.binary import Binary
import logging
'''
* @author ELEVEN28th
* @creat 2023-3-10
'''
logging.basicConfig(filename='app.log', filemode='w', format='%(asctime)s - %(levelname)s - %(message)s',
level=logging.INFO)
data_list = []
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["csgo_items"]
collection = db["newcsgo_items"]
def read_data():
with open('csgoItemsInfoFixed1.json', 'r') as f:
data = json.load(f)
global goods_read
goods_read = []
for items in data:
goods_read.append(items)
def buffcrawl_hi():
with open('cookie.txt', 'r') as f:
cookie_str = f.read().strip()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'
}
Cookie = {
'Cookie': cookie_str
}
proxy = {'http': 'http://113.124.86.24:9999'
}
read_data()
orignal_url = 'https://buff.163.com/api/market/goods/bill_order?game=csgo&goods_id='
data_to_save = [] # 存储要保存到数据库的数据
timestamp = int(time.time()) # 时间戳
for i in range(len(goods_read)):
all_id_url = orignal_url + str(goods_read[i])
print(goods_read[i])
try:
html_response = requests.get(url=all_id_url, headers=headers, cookies=Cookie, proxies=proxy)
html_json = html_response.json()
html_items = html_json['data']['items']
except Exception as e:
logging.error(f"Error occurred while getting HTML response for url: {all_id_url}. {e}")
continue
item_list = [] # 保存当前物品的所有数据
for item in html_items:
try:
inspection_img_url = item['asset_info']['info'].get('inspect_url', '') # 获取检视图链接
if inspection_img_url:
inspection_img = requests.get(url=inspection_img_url).content
else:
inspection_img = None
except Exception as e:
logging.error(f"Error occurred while getting inspection image for url: {inspection_img_url}. {e}")
inspection_img = None
print(inspection_img_url)
stickers = item['asset_info']['info'].get('stickers', [])
itemfloat = item['asset_info']['paintwear']
paintseed = item['asset_info']['info']['paintseed']
price = item['price']
transaction_time = item['transact_time']
stickers_info = []
for sticker in stickers:
try:
img_url = sticker.get('img_url', '')
name = sticker.get('name', '')
slot = sticker.get('slot', '')
wear = sticker.get('wear', '')
if img_url:
img_data = requests.get(img_url).content
else:
img_data = None
sticker_info = {
'img_data': Binary(img_data) if img_data else None,
'name': sticker.get('name', ''),
'slot': sticker.get('slot', ''),
'wear': sticker.get('wear', '')
}
stickers_info.append(sticker_info)
except requests.exceptions.RequestException as e:
logging.exception(f"Request exception occurred while accessing sticker image : {all_id_url}.{e}")
continue # 这个请求失败
try:
item_data = {
'inspection_img': requests.get(url=inspection_img_url).content, # 获取检视图图片数据并保存
'itemfloat': itemfloat,
'paintseed': paintseed,
'price': price,
'transaction_time': transaction_time,
'stickers': stickers_info
}
print(all_id_url)
item_list.append(item_data)
except requests.exceptions.RequestException as e:
logging.exception(f"Request exception occurred while accessing inspection image : {all_id_url}.{e}")
continue
id = goods_read[i]
data_to_save.append({
'id': id,
'timestamp': timestamp,
'data': item_list
})
# 一次性更新所有文档
for id in set(g['id'] for g in data_to_save):
query = {'id': id}
updates = []
for data in data_to_save:
if data['id'] == id:
updates.append({
'$set': {
'container.' + str(data['timestamp']): data['data']
}
})
collection.update_many(query, updates)
time.sleep(2)
buffcrawl_hi()