Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
file_metadata: Add class GenericFile
The GenericFile is the basic scaffolding for all files that can be analyzed. It is a base class for other formats and also holds analysis methods which are applicable to all files. Fixes #5
- Loading branch information
1 parent
a167c16
commit 45eae38
Showing
2 changed files
with
87 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import (division, absolute_import, unicode_literals, | ||
print_function) | ||
|
||
from file_metadata.utilities import PropertyCached | ||
|
||
|
||
class GenericFile: | ||
""" | ||
Object corresponding to a single file. An abstract class that can be | ||
used for any mimetype/media-type (depending of the file itself). Provides | ||
helper functions to open files, and analyze basic data common to all | ||
types of files. | ||
Any class that inherits from this abstract class would probably want to | ||
set the ``mimetypes`` and override the ``analyze()`` or write their | ||
own ``analyze_*()`` methods depending on the file type and analysis | ||
routines that should be run. | ||
:ivar mimetypes: Set of mimetypes (strings) applicable to this class | ||
based on the official standard by IANA. | ||
""" | ||
mimetypes = () | ||
|
||
def __init__(self, fname): | ||
self.filename = fname | ||
|
||
def analyze(self, prefix='analyze_', suffix='', methods=None): | ||
""" | ||
Analyze the given file and create metadata information appropriately. | ||
Search and use all methods that have a name starting with | ||
``analyze_*`` and merge the doctionaries using ``.update()`` | ||
to get the cumulative set of metadata. | ||
:param prefix: Use only methods that have this prefix. | ||
:param suffix: Use only methods that have this suffix. | ||
:param methods: A list of method names to choose from. If not given, | ||
a sorted list of all methods from the class is used. | ||
:return: A dict containing the cumulative metadata. | ||
""" | ||
data = {} | ||
methods = methods or sorted(dir(self)) | ||
for method in methods: | ||
if method.startswith(prefix) and method.endswith(suffix): | ||
data.update(getattr(self, method)()) | ||
return data | ||
|
||
@PropertyCached | ||
def metadata(self): | ||
""" | ||
A python dictionary of all the metadata identified by analyzing | ||
the given file. This property is read-only and cannot be modified. | ||
:return: All the metadata found about the given file. | ||
""" | ||
return self.analyze() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from __future__ import (division, absolute_import, unicode_literals, | ||
print_function) | ||
|
||
from file_metadata.generic_file import GenericFile | ||
from tests import fetch_file, unittest | ||
|
||
|
||
class DerivedFile(GenericFile): | ||
|
||
def analyze(self): | ||
# Only use the `_analyze_test` functions for tests | ||
return GenericFile.analyze(self, prefix='analyze_test') | ||
|
||
def analyze_test1(self): | ||
return {"test1": "test1"} | ||
|
||
def analyze_test2(self): | ||
return {"test2": "test2"} | ||
|
||
|
||
class DerivedFileTest(unittest.TestCase): | ||
|
||
def setUp(self): | ||
self.uut = DerivedFile(fetch_file('ascii.txt')) | ||
|
||
def test_metadata(self): | ||
self.assertEqual(self.uut.metadata, {'test1': 'test1', | ||
'test2': 'test2'}) |