@@ -43,6 +43,7 @@ class ResponseStatistics:
4343 error : int = 0
4444 redirect : int = 0
4545 ip_tables : defaultdict [str , int ] = field (default_factory = lambda : defaultdict (int ))
46+ user_agents : defaultdict [str , int ] = field (default_factory = lambda : defaultdict (int ))
4647
4748
4849
@@ -75,6 +76,7 @@ class ResponseTable(Base):
7576 error = Column (String , nullable = False )
7677 redirect = Column (String , nullable = False )
7778 ip_tables = Column (LargeBinary , nullable = False )
79+ user_agents = Column (LargeBinary , nullable = False )
7880
7981class StatusType (Enum ):
8082 SUCCESS = "success"
@@ -137,10 +139,11 @@ def add_file(cluster: str, storage: Optional[str], bytes: int):
137139 FILE_CACHE [key ].bytes += bytes
138140 FILE_CACHE [key ].hits += 1
139141
140- def add_response (ip : str , type : StatusType ):
142+ def add_response (ip : str , type : StatusType , user_agent : str ):
141143 global RESPONSE_CACHE
142144 hour = get_hour ()
143145 RESPONSE_CACHE [hour ].ip_tables [ip ] += 1
146+ RESPONSE_CACHE [hour ].user_agents [user_agent ] += 1
144147 setattr (RESPONSE_CACHE [hour ], type .value , getattr (RESPONSE_CACHE [hour ], type .value ) + 1 )
145148
146149def get_hour ():
@@ -178,7 +181,7 @@ def _commit_cluster(hour: int, cluster: str, hits: int, bytes: int):
178181 )
179182 return True
180183
181- def _commit_response (hour : int , ip_tables : defaultdict [str , int ], success : int = 0 , forbidden : int = 0 , redirect : int = 0 , not_found : int = 0 , error : int = 0 ):
184+ def _commit_response (hour : int , ip_tables : defaultdict [str , int ], user_agents : defaultdict [ str , int ], success : int = 0 , forbidden : int = 0 , redirect : int = 0 , not_found : int = 0 , error : int = 0 ):
182185 if ip_tables == {}:
183186 return False
184187 session = SESSION .get_session ()
@@ -187,7 +190,9 @@ def _commit_response(hour: int, ip_tables: defaultdict[str, int], success: int =
187190 if q .count () == 0 :
188191 session .add (r )
189192 origin_ip_tables : defaultdict [str , int ] = defaultdict (lambda : 0 )
193+ origin_user_agents : defaultdict [str , int ] = defaultdict (lambda : 0 )
190194 ip_tables_data : bytes = r .ip_tables # type: ignore
195+ user_agents_data : bytes = r .user_agents # type: ignore
191196 if ip_tables_data :
192197 try :
193198 input = utils .DataInputStream (pyzstd .decompress (ip_tables_data ))
@@ -198,16 +203,34 @@ def _commit_response(hour: int, ip_tables: defaultdict[str, int], success: int =
198203 except :
199204 logger .ttraceback ("database.error.unable.to.decompress.ip.tables" , ip_tables_data )
200205 origin_ip_tables .clear ()
206+ if user_agents_data :
207+ try :
208+ input = utils .DataInputStream (pyzstd .decompress (user_agents_data ))
209+ for _ in range (input .read_long ()):
210+ user_agent = input .read_string ()
211+ count = input .read_long ()
212+ origin_user_agents [user_agent ] = count
213+ except :
214+ logger .ttraceback ("database.error.unable.to.decompress.user.agents" , user_agents_data )
201215 for ip , count in ip_tables .items ():
202216 origin_ip_tables [ip ] += count
203- output = utils .DataOutputStream ()
204- output .write_long (len (origin_ip_tables ))
217+
218+ for user_agent , count in user_agents .items ():
219+ origin_user_agents [user_agent ] += count
220+ ip_tables_output = utils .DataOutputStream ()
221+ ip_tables_output .write_long (len (origin_ip_tables ))
205222 for ip , count in origin_ip_tables .items ():
206- output .write_string (ip )
207- output .write_long (count )
223+ ip_tables_output .write_string (ip )
224+ ip_tables_output .write_long (count )
225+ user_agents_output = utils .DataOutputStream ()
226+ user_agents_output .write_long (len (origin_user_agents ))
227+ for user_agent , count in origin_user_agents .items ():
228+ user_agents_output .write_string (user_agent )
229+ user_agents_output .write_long (count )
208230 q .update (
209231 {
210- 'ip_tables' : pyzstd .compress (output .getvalue ()), # type: ignore
232+ 'ip_tables' : pyzstd .compress (ip_tables_output .getvalue ()), # type: ignore
233+ 'user_agents' : pyzstd .compress (user_agents_output .getvalue ()), # type: ignore
211234 'success' : str (int (r .success ) + success ), # type: ignore
212235 'forbidden' : str (int (r .forbidden ) + forbidden ), # type: ignore
213236 'redirect' : str (int (r .redirect ) + redirect ), # type: ignore
@@ -242,7 +265,7 @@ def commit():
242265 _commit_cluster (cluster [0 ], cluster [1 ], value .hits , value .bytes )
243266
244267 for hour , value in response_cache .items ():
245- _commit_response (hour , value .ip_tables , value .success , value .forbidden , value .redirect , value .not_found , value .error )
268+ _commit_response (hour , value .ip_tables , value .user_agents , value . success , value .forbidden , value .redirect , value .not_found , value .error )
246269
247270 session .commit ()
248271 old_keys = []
@@ -263,10 +286,14 @@ def commit():
263286 RESPONSE_CACHE [hour ].not_found -= value .not_found
264287 RESPONSE_CACHE [hour ].error -= value .error
265288 ip_hits = 0
289+ user_agent_hits = 0
266290 for ip , hits in value .ip_tables .items ():
267291 RESPONSE_CACHE [hour ].ip_tables [ip ] -= hits
268292 ip_hits += RESPONSE_CACHE [hour ].ip_tables [ip ]
269- if RESPONSE_CACHE [hour ].success == RESPONSE_CACHE [hour ].forbidden == RESPONSE_CACHE [hour ].redirect == RESPONSE_CACHE [hour ].not_found == RESPONSE_CACHE [hour ].error == ip_hits == 0 :
293+ for user_agent , hits in value .user_agents .items ():
294+ RESPONSE_CACHE [hour ].user_agents [user_agent ] -= hits
295+ user_agent_hits += RESPONSE_CACHE [hour ].user_agents [user_agent ]
296+ if RESPONSE_CACHE [hour ].success == RESPONSE_CACHE [hour ].forbidden == RESPONSE_CACHE [hour ].redirect == RESPONSE_CACHE [hour ].not_found == RESPONSE_CACHE [hour ].error == ip_hits == user_agent_hits == 0 :
270297 old_keys .append (hour )
271298 for key in old_keys :
272299 del RESPONSE_CACHE [key ]
0 commit comments