-
-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Normalize file names in sync module (#2661)
This commit adds a new utility function `remove_special_characters` to the `normalize.py` module in the `sync/utils` directory. The function removes special characters from file names by normalizing the input string and using regular expressions to remove non-alphanumeric characters. The function is then used in the `list_files.py` module in the `sync/utils` directory to normalize the names of files retrieved from Google Drive and Azure Drive. This ensures that the file names are free of special characters, improving consistency and compatibility with other parts of the system. Co-authored-by: Stan Girard <stan@quivr.app>
- Loading branch information
1 parent
a04ceea
commit 8e5af2c
Showing
2 changed files
with
26 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import unicodedata | ||
import re | ||
from logger import get_logger | ||
|
||
logger = get_logger(__name__) | ||
|
||
def remove_special_characters(input): | ||
try: | ||
normalized_string = unicodedata.normalize('NFD', input) | ||
normalized_string = re.sub(r'[^\w\s.]', '', normalized_string) | ||
logger.info(f"Input: {input}, Normalized: {normalized_string}") | ||
return normalized_string | ||
except Exception as e: | ||
logger.error(f"Error removing special characters: {e}") | ||
return input |