Skip to content

Commit

Permalink
Added crc32 as a hash method
Browse files Browse the repository at this point in the history
  • Loading branch information
Kataiser committed Jul 7, 2019
1 parent eb5271b commit 82a90cc
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 14 deletions.
Binary file modified docs/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/_build/doctrees/guide/funcs.doctree
Binary file not shown.
22 changes: 15 additions & 7 deletions docs/_build/html/_modules/fast_package_file.html
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,8 @@ <h1>Source code for fast_package_file</h1><div class="highlight"><pre>

<span class="k">try</span><span class="p">:</span>
<span class="n">loc_data_bin</span> <span class="o">=</span> <span class="n">decomp_func_loc_data</span><span class="p">(</span><span class="n">loc_data_raw</span><span class="p">)</span> <span class="c1"># decompress</span>
<span class="k">except</span> <span class="n">zlib</span><span class="o">.</span><span class="n">error</span> <span class="k">as</span> <span class="n">zlib_error</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PackageDataError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> is corrupted or malformed (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__data_file_path</span><span class="p">,</span> <span class="n">zlib_error</span><span class="p">))</span>
<span class="k">except</span> <span class="p">(</span><span class="ne">OSError</span><span class="p">,</span> <span class="n">zlib</span><span class="o">.</span><span class="n">error</span><span class="p">)</span> <span class="k">as</span> <span class="n">gzip_error</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PackageDataError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> is corrupted or malformed (</span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__data_file_path</span><span class="p">,</span> <span class="n">gzip_error</span><span class="p">))</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">loc_data_bin</span> <span class="o">=</span> <span class="n">loc_data_raw</span>

Expand Down Expand Up @@ -260,15 +260,19 @@ <h1>Source code for fast_package_file</h1><div class="highlight"><pre>
<span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">file_loc_data</span><span class="p">)</span> <span class="o">==</span> <span class="mi">6</span><span class="p">:</span> <span class="c1"># hash info exists</span>
<span class="k">if</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">&#39;md5 &#39;</span><span class="p">):</span>
<span class="n">hasher</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">md5</span><span class="p">()</span>
<span class="n">hasher</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data_file_out</span><span class="p">)</span>
<span class="n">hash_</span> <span class="o">=</span> <span class="n">hasher</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span>
<span class="k">elif</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">&#39;sha256&#39;</span><span class="p">):</span>
<span class="n">hasher</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">sha256</span><span class="p">()</span>
<span class="n">hasher</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data_file_out</span><span class="p">)</span>
<span class="n">hash_</span> <span class="o">=</span> <span class="n">hasher</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span>
<span class="k">elif</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">5</span><span class="p">]</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s1">&#39;crc32 &#39;</span><span class="p">):</span>
<span class="n">hash_</span> <span class="o">=</span> <span class="n">zlib</span><span class="o">.</span><span class="n">crc32</span><span class="p">(</span><span class="n">data_file_out</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="n">PackageDataError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> is corrupted or malformed (hash method seems to be &#39;</span><span class="si">{}</span><span class="s2">&#39;)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__data_file_path</span><span class="p">,</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">5</span><span class="p">][:</span><span class="mi">6</span><span class="p">]))</span>

<span class="n">hasher</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">data_file_out</span><span class="p">)</span>

<span class="k">if</span> <span class="n">hasher</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()</span> <span class="o">!=</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">5</span><span class="p">][</span><span class="mi">6</span><span class="p">:]:</span> <span class="c1"># confirm hash</span>
<span class="k">raise</span> <span class="n">PackageDataError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> is corrupted or malformed (&#39;</span><span class="si">{}</span><span class="s2">&#39; hash mismatch: </span><span class="si">{}</span><span class="s2"> != </span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__data_file_path</span><span class="p">,</span> <span class="n">file</span><span class="p">,</span> <span class="n">hasher</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">(),</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">5</span><span class="p">][</span><span class="mi">6</span><span class="p">:]))</span>
<span class="k">if</span> <span class="n">hash_</span> <span class="o">!=</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">5</span><span class="p">][</span><span class="mi">6</span><span class="p">:]:</span> <span class="c1"># confirm hash</span>
<span class="k">raise</span> <span class="n">PackageDataError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> is corrupted or malformed (&#39;</span><span class="si">{}</span><span class="s2">&#39; hash mismatch: </span><span class="si">{}</span><span class="s2"> != </span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">__data_file_path</span><span class="p">,</span> <span class="n">file</span><span class="p">,</span> <span class="n">hash_</span><span class="p">,</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">5</span><span class="p">][</span><span class="mi">6</span><span class="p">:]))</span>
<span class="k">else</span><span class="p">:</span> <span class="c1"># basically a cheap hash</span>
<span class="k">if</span> <span class="n">data_file_raw</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">!=</span> <span class="n">file_loc_data</span><span class="p">[</span><span class="mi">3</span><span class="p">]:</span> <span class="c1"># check if first byte matches</span>
<span class="k">raise</span> <span class="n">PackageDataError</span><span class="p">(</span><span class="s2">&quot;</span><span class="si">{}</span><span class="s2"> is corrupted or malformed (first byte of file &#39;</span><span class="si">{}</span><span class="s2">&#39; should be </span><span class="si">{}</span><span class="s2">, but was loaded as </span><span class="si">{}</span><span class="s2">)&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span>
Expand Down Expand Up @@ -362,7 +366,7 @@ <h1>Source code for fast_package_file</h1><div class="highlight"><pre>
<span class="sd"> :param compress: Whether to compress the package, either with ``comp_func`` or Gzip by default.</span>
<span class="sd"> :param keep_comp_threshold: 0 through 1 (default is 0.98). For each input file, if compression doesn&#39;t improve file size by this ratio, the file is instead stored uncompressed. Set to 1 to</span>
<span class="sd"> compress every file no matter what.</span>
<span class="sd"> :param hash_mode: The hash method to use to ensure file validity. Can be &quot;md5&quot; or &quot;sha256&quot;. If :py:class:`None` (the default), only the first and last bytes are compared.</span>
<span class="sd"> :param hash_mode: The hash method to use to ensure file validity. Can be &quot;md5&quot;, &quot;crc32&quot;, or &quot;sha256&quot;. If :py:class:`None` (the default), only the first and last bytes are compared.</span>
<span class="sd"> :param comp_func: A supplied decompression function that takes :py:class:`bytes` and returns :py:class:`bytes`. Some recommendations: LZMA, LZMA2, Deflate, BZip2, Oodle, or Zstandard.</span>
<span class="sd"> :param crc32_paths: Store file paths as `crc32 &lt;https://en.wikipedia.org/wiki/Cyclic_redundancy_check&gt;`_ numbers. Useful for obfuscating file names and paths.</span>
<span class="sd"> :param progress_bar: Whether to show a progress bar (uses `tqdm &lt;https://github.com/tqdm/tqdm&gt;`_). If tqdm isn&#39;t installed, this is irrelevant.</span>
Expand Down Expand Up @@ -449,6 +453,10 @@ <h1>Source code for fast_package_file</h1><div class="highlight"><pre>
<span class="n">hasher</span> <span class="o">=</span> <span class="n">hashlib</span><span class="o">.</span><span class="n">sha256</span><span class="p">()</span>
<span class="n">hasher</span><span class="o">.</span><span class="n">update</span><span class="p">(</span><span class="n">input_file_data_raw</span><span class="p">)</span>
<span class="n">loc_data_save</span><span class="p">[</span><span class="n">file_path_out</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">&#39;sha256</span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">hasher</span><span class="o">.</span><span class="n">hexdigest</span><span class="p">()))</span>
<span class="k">elif</span> <span class="n">hash_mode</span> <span class="o">==</span> <span class="s1">&#39;crc32&#39;</span><span class="p">:</span>
<span class="n">loc_data_save</span><span class="p">[</span><span class="n">file_path_out</span><span class="p">]</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s1">&#39;crc32 </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">zlib</span><span class="o">.</span><span class="n">crc32</span><span class="p">(</span><span class="n">input_file_data_raw</span><span class="p">)))</span>
<span class="k">elif</span> <span class="n">hash_mode</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">KeyError</span><span class="p">(</span><span class="s2">&quot;&quot;&quot;&#39;hash_mode&#39; isn&#39;t &quot;md5&quot;, &quot;sha256&quot;, or &quot;crc32&quot; &quot;&quot;&quot;</span><span class="p">[:</span><span class="o">-</span><span class="mi">1</span><span class="p">])</span>

<span class="n">comp_func_loc_data</span> <span class="o">=</span> <span class="n">comp_func</span> <span class="k">if</span> <span class="p">(</span><span class="n">comp_func</span> <span class="ow">and</span> <span class="n">compress</span><span class="p">)</span> <span class="k">else</span> <span class="n">_gzip_compress_fix</span>
<span class="n">loc_data_save_json</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">loc_data_save</span><span class="p">,</span> <span class="n">separators</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;,&#39;</span><span class="p">,</span> <span class="s1">&#39;:&#39;</span><span class="p">),</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span><span class="o">.</span><span class="n">encode</span><span class="p">(</span><span class="s1">&#39;utf-8&#39;</span><span class="p">)</span> <span class="c1"># convert header to binary</span>
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/guide/funcs.html
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ <h1>Function reference<a class="headerlink" href="#function-reference" title="Pe
<li><p><strong>compress</strong> – Whether to compress the package, either with <code class="docutils literal notranslate"><span class="pre">comp_func</span></code> or Gzip by default.</p></li>
<li><p><strong>keep_comp_threshold</strong> – 0 through 1 (default is 0.98). For each input file, if compression doesn’t improve file size by this ratio, the file is instead stored uncompressed. Set to 1 to
compress every file no matter what.</p></li>
<li><p><strong>hash_mode</strong> – The hash method to use to ensure file validity. Can be “md5” or “sha256”. If <code class="xref py py-class docutils literal notranslate"><span class="pre">None</span></code> (the default), only the first and last bytes are compared.</p></li>
<li><p><strong>hash_mode</strong> – The hash method to use to ensure file validity. Can be “md5”, “crc32”, or “sha256”. If <code class="xref py py-class docutils literal notranslate"><span class="pre">None</span></code> (the default), only the first and last bytes are compared.</p></li>
<li><p><strong>comp_func</strong> – A supplied decompression function that takes <code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code> and returns <code class="xref py py-class docutils literal notranslate"><span class="pre">bytes</span></code>. Some recommendations: LZMA, LZMA2, Deflate, BZip2, Oodle, or Zstandard.</p></li>
<li><p><strong>crc32_paths</strong> – Store file paths as <a class="reference external" href="https://en.wikipedia.org/wiki/Cyclic_redundancy_check">crc32</a> numbers. Useful for obfuscating file names and paths.</p></li>
<li><p><strong>progress_bar</strong> – Whether to show a progress bar (uses <a class="reference external" href="https://github.com/tqdm/tqdm">tqdm</a>). If tqdm isn’t installed, this is irrelevant.</p></li>
Expand Down
18 changes: 13 additions & 5 deletions fast_package_file/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,19 @@ def load_file(self, file: str) -> bytes:
if len(file_loc_data) == 6: # hash info exists
if file_loc_data[5].startswith('md5 '):
hasher = hashlib.md5()
hasher.update(data_file_out)
hash_ = hasher.hexdigest()
elif file_loc_data[5].startswith('sha256'):
hasher = hashlib.sha256()
hasher.update(data_file_out)
hash_ = hasher.hexdigest()
elif file_loc_data[5].startswith('crc32 '):
hash_ = zlib.crc32(data_file_out)
else:
raise PackageDataError("{} is corrupted or malformed (hash method seems to be '{}')".format(self.__data_file_path, file_loc_data[5][:6]))

hasher.update(data_file_out)

if hasher.hexdigest() != file_loc_data[5][6:]: # confirm hash
raise PackageDataError("{} is corrupted or malformed ('{}' hash mismatch: {} != {})".format(self.__data_file_path, file, hasher.hexdigest(), file_loc_data[5][6:]))
if hash_ != file_loc_data[5][6:]: # confirm hash
raise PackageDataError("{} is corrupted or malformed ('{}' hash mismatch: {} != {})".format(self.__data_file_path, file, hash_, file_loc_data[5][6:]))
else: # basically a cheap hash
if data_file_raw[0] != file_loc_data[3]: # check if first byte matches
raise PackageDataError("{} is corrupted or malformed (first byte of file '{}' should be {}, but was loaded as {})".format(
Expand Down Expand Up @@ -212,7 +216,7 @@ def build(directory: str, target: str, compress: bool = True, keep_comp_threshol
:param compress: Whether to compress the package, either with ``comp_func`` or Gzip by default.
:param keep_comp_threshold: 0 through 1 (default is 0.98). For each input file, if compression doesn't improve file size by this ratio, the file is instead stored uncompressed. Set to 1 to
compress every file no matter what.
:param hash_mode: The hash method to use to ensure file validity. Can be "md5" or "sha256". If :py:class:`None` (the default), only the first and last bytes are compared.
:param hash_mode: The hash method to use to ensure file validity. Can be "md5", "crc32", or "sha256". If :py:class:`None` (the default), only the first and last bytes are compared.
:param comp_func: A supplied decompression function that takes :py:class:`bytes` and returns :py:class:`bytes`. Some recommendations: LZMA, LZMA2, Deflate, BZip2, Oodle, or Zstandard.
:param crc32_paths: Store file paths as `crc32 <https://en.wikipedia.org/wiki/Cyclic_redundancy_check>`_ numbers. Useful for obfuscating file names and paths.
:param progress_bar: Whether to show a progress bar (uses `tqdm <https://github.com/tqdm/tqdm>`_). If tqdm isn't installed, this is irrelevant.
Expand Down Expand Up @@ -299,6 +303,10 @@ def build(directory: str, target: str, compress: bool = True, keep_comp_threshol
hasher = hashlib.sha256()
hasher.update(input_file_data_raw)
loc_data_save[file_path_out].append('sha256{}'.format(hasher.hexdigest()))
elif hash_mode == 'crc32':
loc_data_save[file_path_out].append('crc32 {}'.format(zlib.crc32(input_file_data_raw)))
elif hash_mode:
raise KeyError("""'hash_mode' isn't "md5", "sha256", or "crc32" """[:-1])

comp_func_loc_data = comp_func if (comp_func and compress) else _gzip_compress_fix
loc_data_save_json = json.dumps(loc_data_save, separators=(',', ':'), sort_keys=True).encode('utf-8') # convert header to binary
Expand Down
Binary file modified test_dir/ref_crc32.data
Binary file not shown.
2 changes: 1 addition & 1 deletion tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def build_references():
fast_package_file.build('docs_v1.0_testing', ref_list[4], hash_mode='sha256', compress=False)
fast_package_file.build('docs_v1.0_testing', ref_list[5], hash_mode='sha256')
fast_package_file.build('docs_v1.0_testing', ref_list[6], comp_func=lzma.compress)
fast_package_file.build('docs_v1.0_testing', ref_list[7], crc32_paths=True)
fast_package_file.build('docs_v1.0_testing', ref_list[7], hash_mode='crc32', crc32_paths=True)


ref_list = ['ref_uncompressed.data', 'ref_compressed.data', 'ref_md5_uncompressed.data', 'ref_md5_compressed.data', 'ref_sha256_uncompressed.data', 'ref_sha256_compressed.data',
Expand Down

0 comments on commit 82a90cc

Please sign in to comment.