55import secrets
66import threading
77from glob import glob
8+ from pathlib import Path
89from typing import Any , Dict , List , Optional
910
1011from app .atomic_io import atomic_write_json , path_lock
@@ -19,13 +20,23 @@ def _safe_owner(owner: str) -> str:
1920 return owner [:128 ]
2021
2122
23+ def _resolve_under_base (base_dir : str , * parts : str ) -> str :
24+ base = Path (base_dir ).resolve ()
25+ candidate = (base .joinpath (* parts )).resolve ()
26+ try :
27+ candidate .relative_to (base )
28+ except ValueError as e :
29+ raise ValueError ("path_outside_base_dir" ) from e
30+ return str (candidate )
31+
32+
2233def _owner_manifest_path (owner : str , base_dir : str = DEFAULT_DIR ) -> str :
2334 os .makedirs (base_dir , exist_ok = True )
24- return os . path . join (base_dir , f"{ _safe_owner (owner )} .json" )
35+ return _resolve_under_base (base_dir , f"{ _safe_owner (owner )} .json" )
2536
2637
2738def _owner_files_dir (owner : str , base_dir : str = DEFAULT_DIR ) -> str :
28- path = os . path . join (base_dir , "_files" , _safe_owner (owner ))
39+ path = _resolve_under_base (base_dir , "_files" , _safe_owner (owner ))
2940 os .makedirs (path , exist_ok = True )
3041 return path
3142
@@ -185,7 +196,8 @@ def batch_mutate_items(
185196
186197def write_text_blob (owner : str , source_id : str , text : str , base_dir : str = DEFAULT_DIR ) -> str :
187198 files_dir = _owner_files_dir (owner , base_dir )
188- path = os .path .join (files_dir , f"{ source_id } .txt" )
199+ safe_source_id = re .sub (r"[^a-zA-Z0-9._-]+" , "_" , (source_id or "" ).strip ())[:120 ] or "source"
200+ path = _resolve_under_base (files_dir , f"{ safe_source_id } .txt" )
189201 with _LOCK :
190202 parent = os .path .dirname (path )
191203 if parent :
@@ -195,18 +207,21 @@ def write_text_blob(owner: str, source_id: str, text: str, base_dir: str = DEFAU
195207 return path
196208
197209
198- def read_text_blob (path : str ) -> str :
210+ def read_text_blob (path : str , base_dir : str = DEFAULT_DIR ) -> str :
199211 try :
200- with open (path , "r" , encoding = "utf-8" ) as f :
212+ resolved = Path (path or "" ).resolve ()
213+ resolved .relative_to (Path (base_dir ).resolve ())
214+ with open (resolved , "r" , encoding = "utf-8" ) as f :
201215 return f .read ()
202216 except Exception :
203217 return ""
204218
205219
206220def write_binary_blob (owner : str , source_id : str , filename : str , data : bytes , base_dir : str = DEFAULT_DIR ) -> str :
207221 files_dir = _owner_files_dir (owner , base_dir )
222+ safe_source_id = re .sub (r"[^a-zA-Z0-9._-]+" , "_" , (source_id or "" ).strip ())[:120 ] or "source"
208223 safe = re .sub (r"[^a-zA-Z0-9._-]+" , "_" , (filename or "upload" ).strip ())[:180 ] or "upload"
209- path = os . path . join (files_dir , f"{ source_id } __{ safe } " )
224+ path = _resolve_under_base (files_dir , f"{ safe_source_id } __{ safe } " )
210225 with _LOCK :
211226 parent = os .path .dirname (path )
212227 if parent :
@@ -216,9 +231,13 @@ def write_binary_blob(owner: str, source_id: str, filename: str, data: bytes, ba
216231 return path
217232
218233
219- def remove_file (path : str ) -> None :
234+ def remove_file (path : str , base_dir : str = DEFAULT_DIR ) -> None :
220235 try :
221- if path and os .path .exists (path ):
222- os .remove (path )
236+ if not path :
237+ return
238+ resolved = Path (path ).resolve ()
239+ resolved .relative_to (Path (base_dir ).resolve ())
240+ if os .path .exists (resolved ):
241+ os .remove (resolved )
223242 except Exception :
224243 pass
0 commit comments