Skip to content

Commit

Permalink
feat: separate and proper (I hope so) handling of symlinks
Browse files Browse the repository at this point in the history
  • Loading branch information
F33RNI committed Feb 5, 2024
1 parent 4a11078 commit 42681d3
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 97 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ dist/
build/
Test/
test.py
config.json
config.json
config_.json
163 changes: 107 additions & 56 deletions Backupper.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions GUIHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ def gui_set_enabled(self, enabled: bool = True) -> None:
self.groupBox_3.setEnabled(enabled)
self.le_save_to.setEnabled(enabled)
self.btn_save_to_browse.setEnabled(enabled)
self.cb_follow_symlinks.setEnabled(enabled)
self.cb_delete_data.setEnabled(enabled)
self.cb_delete_skipped.setEnabled(self._config_manager.get_config("delete_data") if enabled else False)
self.cb_create_empty_dirs.setEnabled(enabled)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ Result:
3. The `🗑️` button will remove the current path from the input data (it will not be copied to the backup)
4. Select output (backup) directory by pressing `Browse` button or type path manually near _Where to save backup:_
5. Change settings if you need:
1. `Follow symlinks` - If set, during tree generation (before calculating checksums, deleting and copying files), symbolic links will be followed (if supported by the system) **_Default:_** `Unchecked`
1. `Follow symlinks` - If set, during tree generation (before calculating checksums) and while copying files symbolic links will be followed (if supported by the system). **Without this flag, symlinks will be created inside backup.** **_Default:_** `Unchecked`
2. `Delete entries from backup according to input data` - If set, those files inside the existing backup that don't exist in the input paths will be deleted. **Without this flag, files from the backup will never be deleted.** **_Default:_** `Checked`
1. `Delete skipped entries` - If set, files in skipped paths will also be deleted from the existing backup. Without this flag, if files exist even in the skipped paths, they will not be deleted. **_Default:_** `Unchecked`
3. `Create empty directories` - If set, all empty folders in the input paths will also be created in the backup. **Without this flag, only non-empty directories will be copied.** **_Default:_** `Checked`
Expand Down
49 changes: 37 additions & 12 deletions copy_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
You should have received a copy of the GNU Affero General Public License long with this program.
If not, see <http://www.gnu.org/licenses/>.
"""

import logging
import multiprocessing
import os
Expand All @@ -36,17 +37,20 @@ def copy_entries(
checksums_input: Dict,
checksums_output: Dict,
output_dir: str,
follow_symlinks: bool,
stats_copied_ok_value: multiprocessing.Value,
stats_copied_error_value: multiprocessing.Value,
stats_created_dirs_ok_value: multiprocessing.Value,
stats_created_dirs_error_value: multiprocessing.Value,
stats_created_symlinks_value: multiprocessing.Value,
control_value: multiprocessing.Value or None = None,
logging_queue: multiprocessing.Queue or None = None,
) -> None:
"""Process body to copy input files and directories to the backup output_dir
Args:
filepaths_queue (multiprocessing.Queue): queue of non-skipped files to to copy (path relative to root, root dir)
filepaths_queue (multiprocessing.Queue): queue of non-skipped files and symlink to to copy
(path relative to root, root dir)
checksums_input (Dict): checksums of input files
checksums_output (Dict): checksums of output files
{
Expand All @@ -60,10 +64,12 @@ def copy_entries(
}
}
output_dir (str): path to the output (backup) directory
follow_symlinks (bool): False to copy symlinks themselves instead of referenced files
stats_copied_ok_value (multiprocessing.Value): counter of total successful copy calls
stats_copied_error_value (multiprocessing.Value): counter of total unsuccessful copy calls
stats_created_dirs_ok_value (multiprocessing.Value): counter of total successful mkdirs calls
stats_created_dirs_error_value (multiprocessing.Value): counter of total unsuccessful mkdirs calls
stats_created_symlinks_value (multiprocessing.Value): counter of total created symlinks
control_value (multiprocessing.Value or None, optional): value (int) to pause / cancel process
logging_queue (multiprocessing.Queue or None, optional): logging queue to accept logs
"""
Expand Down Expand Up @@ -136,20 +142,21 @@ def copy_entries(
if filepath_rel in checksums_input:
checksum_input = checksums_input[filepath_rel]["checksum"]

# Raise an error if no input checksum
if not checksum_input:
raise Exception(f"No checksum was calculated for {checksum_input}")

# Generate output absolute path
output_path_abs = os.path.join(output_dir, filepath_rel)

# Find output checksum
checksum_output = None
if filepath_rel in checksums_output:
checksum_output = checksums_output[filepath_rel]["checksum"]

# Generate output absolute path
output_path_abs = os.path.join(output_dir, filepath_rel)

# Skip if file exists and checksums are equal
if os.path.exists(output_path_abs) and checksum_output and checksum_output == checksum_input:
if (
os.path.exists(output_path_abs)
and checksum_input
and checksum_output
and checksum_output == checksum_input
):
continue

# Try to create directories if not exist
Expand All @@ -172,10 +179,28 @@ def copy_entries(
stats_created_dirs_error_value.value += 1
continue

# Copy symlink (create a new one)
if not follow_symlinks and os.path.islink(input_file_abs):
link_to = os.readlink(input_file_abs)

# Ignore if already exists
if (
os.path.exists(output_path_abs)
and os.path.islink(output_path_abs)
and os.readlink(output_path_abs) == link_to
):
continue

# Create symlink
os.symlink(link_to, output_path_abs)
with stats_created_symlinks_value.get_lock():
stats_created_symlinks_value.value += 1

# Copy file
shutil.copy(input_file_abs, output_path_abs)
with stats_copied_ok_value.get_lock():
stats_copied_ok_value.value += 1
else:
shutil.copy(input_file_abs, output_path_abs, follow_symlinks=follow_symlinks)
with stats_copied_ok_value.get_lock():
stats_copied_ok_value.value += 1

# Error occurred -> log error and increment error counter
except Exception as e:
Expand Down
13 changes: 10 additions & 3 deletions delete_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
You should have received a copy of the GNU Affero General Public License long with this program.
If not, see <http://www.gnu.org/licenses/>.
"""

import logging
import multiprocessing
import os
Expand Down Expand Up @@ -173,8 +174,14 @@ def delete_files(
if not delete_flag:
continue

# Remove only link. If everything is ok, this must me 3rd. But it's here just in case
if os.path.islink(out_filepath_abs):
os.unlink(out_filepath_abs)
with stats_deleted_ok_value.get_lock():
stats_deleted_ok_value.value += 1

# Must be 1st
if tree_type == "files":
elif tree_type == "files":
# Delete as file
os.remove(out_filepath_abs)
with stats_deleted_ok_value.get_lock():
Expand All @@ -201,8 +208,8 @@ def delete_files(
with stats_deleted_ok_value.get_lock():
stats_deleted_ok_value.value += 1

# "unknown" Must be 3rd
# Idk what exactly we should do here, so first we delete it as a file and then as a directory
# "unknown" Must be 4th
# firstly we're trying to delete it as a file and then as a directory
else:
deleted = False

Expand Down
15 changes: 4 additions & 11 deletions gui.ui
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<rect>
<x>0</x>
<y>0</y>
<width>474</width>
<width>519</width>
<height>757</height>
</rect>
</property>
Expand Down Expand Up @@ -77,7 +77,7 @@
<rect>
<x>0</x>
<y>0</y>
<width>448</width>
<width>493</width>
<height>112</height>
</rect>
</property>
Expand Down Expand Up @@ -148,7 +148,7 @@
<item>
<widget class="QCheckBox" name="cb_follow_symlinks">
<property name="toolTip">
<string>Follow symbolic links when parsing files</string>
<string>Follow symbolic links when parsing and copying files</string>
</property>
<property name="text">
<string>Follow symlinks</string>
Expand Down Expand Up @@ -514,14 +514,7 @@
</widget>
<widget class="QStatusBar" name="statusbar">
<property name="toolTip">
<string>S: Stage current / total stages
Fv: Files Viewed
Dv: Directories Viewed
C: Checksums calculated, errors
FDcp: Files and Directories copied, errors
FDdel: Files and Directories deleted, errors
Dcr: Directories created, errors
Cvld: Checksums validated, mismatches, not exist</string>
<string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;S: Stage current / total stages&lt;/p&gt;&lt;p&gt;Fv: Files Viewed&lt;/p&gt;&lt;p&gt;Dv: Directories Viewed&lt;/p&gt;&lt;p&gt;Sv: Symlinks Viewed&lt;/p&gt;&lt;p&gt;Uv: Unknown entries Viewed&lt;/p&gt;&lt;p&gt;C: Checksums calculated, errors&lt;/p&gt;&lt;p&gt;FDcp: Files and Directories copied, errors&lt;/p&gt;&lt;p&gt;FDdel: Files and Directories deleted, errors&lt;/p&gt;&lt;p&gt;Dcr: Directories created, errors&lt;/p&gt;&lt;p&gt;Scr: Symlinks created&lt;/p&gt;&lt;p&gt;Cvld: Checksums validated, mismatches, not exist&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
</property>
</widget>
</widget>
Expand Down
38 changes: 25 additions & 13 deletions tree_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
You should have received a copy of the GNU Affero General Public License long with this program.
If not, see <http://www.gnu.org/licenses/>.
"""

import logging
import multiprocessing
import os
Expand All @@ -31,9 +32,10 @@
QUEUE_TIMEOUT = 2

# Definitions for parsed_queue
PATH_IS_FILE = 0
PATH_IS_DIR = 1
PATH_UNKNOWN = 2
PATH_FILE = 0
PATH_DIR = 1
PATH_SYMLINK = 2
PATH_UNKNOWN = 3


def tree_parser(
Expand All @@ -43,6 +45,8 @@ def tree_parser(
follow_symlinks: bool,
stats_tree_parsed_dirs: multiprocessing.Value,
stats_tree_parsed_files: multiprocessing.Value,
stats_tree_parsed_symlinks: multiprocessing.Value,
stats_tree_parsed_unknown: multiprocessing.Value,
control_value: multiprocessing.Value or None = None,
logging_queue: multiprocessing.Queue or None = None,
) -> None:
Expand All @@ -59,6 +63,8 @@ def tree_parser(
follow_symlinks (bool): True to follow symlinks, false to ignore them
stats_tree_parsed_dirs (multiprocessing.Value): counter of total successfully parsed directories
stats_tree_parsed_files (multiprocessing.Value): counter of total successfully parsed files
stats_tree_parsed_symlinks (multiprocessing.Value): counter of total successfully parsed symlinks
stats_tree_parsed_unknown (multiprocessing.Value): counter of total parsed unknown entries
control_value (multiprocessing.Value or None, optional): value (int) to pause / cancel process
logging_queue (multiprocessing.Queue or None, optional): logging queue to accept logs
"""
Expand Down Expand Up @@ -151,21 +157,27 @@ def tree_parser(

# Parse it
try:
# Ignore symlinks
if dir_or_file.is_symlink() and not follow_symlinks:
continue

# Ignore if found in skipped paths
if os.path.normpath(str(dir_or_file)) in skipped_entries_abs:
continue

# Find path relative to root
dir_or_file_rel = os.path.relpath(dir_or_file, root_dir)

# Increment symlinks counter
if dir_or_file.is_symlink():
with stats_tree_parsed_symlinks.get_lock():
stats_tree_parsed_symlinks.value += 1

# Symlink and not follow it -> just parse as symlink
if dir_or_file.is_symlink() and not follow_symlinks:
# (relative path, root dir path, PATH_..., is empty directory)
parsed_queue.put((dir_or_file_rel, root_dir, PATH_SYMLINK, False))

# Just file -> put to parsed queue
if dir_or_file.is_file():
elif dir_or_file.is_file():
# (relative path, root dir path, PATH_..., is empty directory)
parsed_queue.put((dir_or_file_rel, root_dir, PATH_IS_FILE, False))
parsed_queue.put((dir_or_file_rel, root_dir, PATH_FILE, False))

# Increment counter
with stats_tree_parsed_files.get_lock():
Expand All @@ -180,7 +192,7 @@ def tree_parser(
is_empty = False

# (relative path, root dir path, PATH_..., is empty directory)
parsed_queue.put((dir_or_file_rel, root_dir, PATH_IS_DIR, is_empty))
parsed_queue.put((dir_or_file_rel, root_dir, PATH_DIR, is_empty))

# Put again in recursion if not empty with the same root
if not is_empty:
Expand All @@ -195,9 +207,9 @@ def tree_parser(
# (relative path, root dir path, PATH_..., is empty directory)
parsed_queue.put((dir_or_file_rel, root_dir, PATH_UNKNOWN, False))

# Increment files counter ¯\_(ツ)_/¯
with stats_tree_parsed_files.get_lock():
stats_tree_parsed_files.value += 1
# Increment unknown counter
with stats_tree_parsed_unknown.get_lock():
stats_tree_parsed_unknown.value += 1

# Error occurred -> log error
except Exception as e:
Expand Down

0 comments on commit 42681d3

Please sign in to comment.