Skip to content

Commit

Permalink
Moved deduplication to mirror
Browse files Browse the repository at this point in the history
* Downloader is used for mirroring/offline, so deduplication doesn't make sense there.
* Download logic is much cleaner.
  • Loading branch information
tmuntaner committed Jan 10, 2018
1 parent 0409fea commit 4bec051
Show file tree
Hide file tree
Showing 25 changed files with 1,675 additions and 351 deletions.
6 changes: 4 additions & 2 deletions app/models/downloaded_file.rb
@@ -1,12 +1,14 @@
class DownloadedFile < ApplicationRecord

def self.add_file!(checksum_type, checksum, file_size, local_path)
return nil unless local_path.match?(/\.(rpm|drpm)$/)
def self.add_file(checksum_type, checksum, file_size, local_path)
return unless local_path.match?(/\.(rpm|drpm)$/)

DownloadedFile.create({ checksum_type: checksum_type,
checksum: checksum,
local_path: local_path,
file_size: file_size })
rescue ActiveRecord::RecordNotUnique
return
end

def self.get_local_path_by_checksum(checksum_type, checksum)
Expand Down
2 changes: 1 addition & 1 deletion lib/rmt/deduplicator.rb
Expand Up @@ -26,7 +26,7 @@ def copy(src, dest)

def self.add_local(path, checksum_type, checksum)
file_size = File.size(path)
DownloadedFile.add_file!(checksum_type, checksum, file_size, path)
DownloadedFile.add_file(checksum_type, checksum, file_size, path)
end

def self.deduplicate(checksum_type, checksum_value, destination)
Expand Down
53 changes: 15 additions & 38 deletions lib/rmt/downloader.rb
Expand Up @@ -24,9 +24,7 @@ def initialize(repository_url:, local_path:, auth_token: nil, logger: nil)
end

def download(remote_file, checksum_type = nil, checksum_value = nil)
local_file = make_local_path(remote_file)
was_deduplicated = deduplicate(checksum_type, checksum_value, local_file)
return local_file if was_deduplicated
local_file = self.class.make_local_path(@local_path, remote_file)

request_fiber = Fiber.new do
make_request(remote_file, local_file, request_fiber, checksum_type, checksum_value)
Expand All @@ -46,17 +44,22 @@ def download_multi(files)
@hydra.run
end

def self.make_local_path(root_path, remote_file)
filename = File.join(root_path, remote_file.gsub(/\.\./, '__'))
dirname = File.dirname(filename)

FileUtils.mkdir_p(dirname)

filename
end

protected

def process_queue
# Skip over files that already exist
begin
queue_item = @queue.shift
return unless queue_item

remote_file = queue_item.location
local_file = make_local_path(remote_file)
end while (File.exist?(local_file) || deduplicate(queue_item[:checksum_type], queue_item[:checksum], local_file)) # rubocop:disable Lint/Loop
queue_item = @queue.shift
return unless queue_item
remote_file = queue_item.location
local_file = self.class.make_local_path(@local_path, remote_file)

# The request is wrapped into a fiber for exception handling
request_fiber = Fiber.new do
Expand All @@ -72,15 +75,6 @@ def process_queue
@hydra.queue(request_fiber.resume)
end

def deduplicate(checksum_type, checksum_value, destination)
return false unless ::RMT::Deduplicator.deduplicate(checksum_type, checksum_value, destination)
@logger.info("→ #{File.basename(destination)}")
true
rescue ::RMT::Deduplicator::MismatchException => e
@logger.debug("× File does not exist or has wrong filesize, deduplication ignored #{e.message}.")
false
end

def make_request(remote_file, local_file, request_fiber, checksum_type = nil, checksum_value = nil)
uri = URI.join(@repository_url, remote_file)
uri.query = @auth_token if (@auth_token && uri.scheme != 'file')
Expand All @@ -106,26 +100,9 @@ def make_request(remote_file, local_file, request_fiber, checksum_type = nil, ch
FileUtils.mv(downloaded_file.path, local_file)
File.chmod(0o644, local_file)

add_local_to_deduplicator(local_file, checksum_type, checksum_value)
::RMT::Deduplicator.add_local(local_file, checksum_type, checksum_value)

@logger.info("↓ #{File.basename(local_file)}")
end

def add_local_to_deduplicator(path, checksum_type, checksum)
::RMT::Deduplicator.add_local(path, checksum_type, checksum)
rescue StandardError => e
# we don't really care whether or not this goes to the database.
@logger.debug e.message
e.backtrace.each { |line| @logger.debug line }
end

def make_local_path(remote_file)
filename = File.join(@local_path, remote_file.gsub(/\.\./, '__'))
dirname = File.dirname(filename)

FileUtils.mkdir_p(dirname)

filename
end

end
35 changes: 31 additions & 4 deletions lib/rmt/mirror.rb
Expand Up @@ -3,7 +3,8 @@

class RMT::Mirror

class RMT::Mirror::Exception < RuntimeError; end
class RMT::Mirror::Exception < RuntimeError
end

def initialize(mirroring_base_dir:, repository_url:, local_path:, mirror_src: false, auth_token: nil, logger: nil)
@mirroring_base_dir = mirroring_base_dir
Expand Down Expand Up @@ -118,16 +119,18 @@ def mirror_license
end

def mirror_data
root_path = File.join(@mirroring_base_dir, @local_path)
@downloader.repository_url = @repository_url
@downloader.local_path = File.join(@mirroring_base_dir, @local_path)
@downloader.local_path = root_path

@deltainfo_files.each do |filename|
parser = RMT::Rpm::DeltainfoXmlParser.new(
File.join(@repodata_dir, filename),
@mirror_src
)
parser.parse
@downloader.download_multi(parser.referenced_files)
to_download = parsed_files_after_dedup(root_path, parser.referenced_files)
@downloader.download_multi(to_download) unless to_download.empty?
end

@primary_files.each do |filename|
Expand All @@ -136,7 +139,8 @@ def mirror_data
@mirror_src
)
parser.parse
@downloader.download_multi(parser.referenced_files)
to_download = parsed_files_after_dedup(root_path, parser.referenced_files)
@downloader.download_multi(to_download) unless to_download.empty?
end
end

Expand All @@ -153,4 +157,27 @@ def replace_metadata
FileUtils.remove_entry(@repodata_dir)
end

private

def deduplicate(checksum_type, checksum_value, destination)
return false unless ::RMT::Deduplicator.deduplicate(checksum_type, checksum_value, destination)
@logger.info("→ #{File.basename(destination)}")
true
rescue ::RMT::Deduplicator::MismatchException => e
@logger.debug("× File does not exist or has wrong filesize, deduplication ignored #{e.message}.")
false
end

def parsed_files_after_dedup(root_path, referenced_files)
files = referenced_files.map do |parsed_file|
local_file = ::RMT::Downloader.make_local_path(root_path, parsed_file.location)
if File.exist?(local_file) || deduplicate(parsed_file[:checksum_type], parsed_file[:checksum], local_file)
nil
else
parsed_file
end
end
files.compact
end

end
@@ -0,0 +1,4 @@
directory.yast
license.de.txt
license.ru.txt
license.txt
@@ -0,0 +1,2 @@
This program is distributed WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
@@ -0,0 +1,2 @@
This program is distributed WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
@@ -0,0 +1,2 @@
This program is distributed WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8"?>
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
<revision>1500454273</revision>
<data type="filelists">
<checksum type="sha256">837fb50abc9680b1e11e050901a56721855a5e854e85e46ceaad2c6816297e69</checksum>
<open-checksum type="sha256">a65752c29ed9e2921c1654327cae06cf58d0085ea76756ab240f2e7e42499654</open-checksum>
<location href="repodata/837fb50abc9680b1e11e050901a56721855a5e854e85e46ceaad2c6816297e69-filelists.xml.gz"/>
<timestamp>1500454273</timestamp>
<size>402</size>
<open-size>929</open-size>
</data>
<data type="deltainfo">
<checksum type="sha256">a546b430098b8a3fb7d65493a9ce608fafcb32f451d0ce8bf85410191f347cc3</checksum>
<open-checksum type="sha256">e71433e2373d61a2e19f335c7f634b9bcfb1d458bc912626dc9ddc5f3e124981</open-checksum>
<location href="repodata/a546b430098b8a3fb7d65493a9ce608fafcb32f451d0ce8bf85410191f347cc3-deltainfo.xml.gz"/>
<timestamp>1500454273</timestamp>
<size>451</size>
<open-size>877</open-size>
</data>
<data type="other">
<checksum type="sha256">2d12587a74d924bad597fd8e25b8955270dfbe7591e020f9093edbb4a0d04444</checksum>
<open-checksum type="sha256">e490571870df62fa9f96b9bf4f5904c6f49987288758cac5f4a5b16300a64e8f</open-checksum>
<location href="repodata/2d12587a74d924bad597fd8e25b8955270dfbe7591e020f9093edbb4a0d04444-other.xml.gz"/>
<timestamp>1500454273</timestamp>
<size>379</size>
<open-size>791</open-size>
</data>
<data type="primary">
<checksum type="sha256">abf421e45af5cd686f050bab3d2a98e0a60d1b5ca3b07c86cb948fc1abfa675e</checksum>
<open-checksum type="sha256">cceefe3857696a694110fc35bf14f8e2b77a1ed8aee1d422f0e99be20a3fc42f</open-checksum>
<location href="repodata/abf421e45af5cd686f050bab3d2a98e0a60d1b5ca3b07c86cb948fc1abfa675e-primary.xml.gz"/>
<timestamp>1500454273</timestamp>
<size>920</size>
<open-size>4399</open-size>
</data>
</repomd>
@@ -0,0 +1 @@
Dummy signature.
@@ -0,0 +1 @@
Dummy key.

0 comments on commit 4bec051

Please sign in to comment.