Skip to content

Commit

Permalink
file_metadata: Add class GenericFile
Browse files Browse the repository at this point in the history
The GenericFile is the basic scaffolding for all files that can
be analyzed. It is a base class for other formats and also holds
analysis methods which are applicable to all files.

Fixes #5
  • Loading branch information
AbdealiLoKo committed May 24, 2016
1 parent a167c16 commit 45eae38
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 0 deletions.
57 changes: 57 additions & 0 deletions file_metadata/generic_file.py
@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-

from __future__ import (division, absolute_import, unicode_literals,
print_function)

from file_metadata.utilities import PropertyCached


class GenericFile:
"""
Object corresponding to a single file. An abstract class that can be
used for any mimetype/media-type (depending of the file itself). Provides
helper functions to open files, and analyze basic data common to all
types of files.
Any class that inherits from this abstract class would probably want to
set the ``mimetypes`` and override the ``analyze()`` or write their
own ``analyze_*()`` methods depending on the file type and analysis
routines that should be run.
:ivar mimetypes: Set of mimetypes (strings) applicable to this class
based on the official standard by IANA.
"""
mimetypes = ()

def __init__(self, fname):
self.filename = fname

def analyze(self, prefix='analyze_', suffix='', methods=None):
"""
Analyze the given file and create metadata information appropriately.
Search and use all methods that have a name starting with
``analyze_*`` and merge the doctionaries using ``.update()``
to get the cumulative set of metadata.
:param prefix: Use only methods that have this prefix.
:param suffix: Use only methods that have this suffix.
:param methods: A list of method names to choose from. If not given,
a sorted list of all methods from the class is used.
:return: A dict containing the cumulative metadata.
"""
data = {}
methods = methods or sorted(dir(self))
for method in methods:
if method.startswith(prefix) and method.endswith(suffix):
data.update(getattr(self, method)())
return data

@PropertyCached
def metadata(self):
"""
A python dictionary of all the metadata identified by analyzing
the given file. This property is read-only and cannot be modified.
:return: All the metadata found about the given file.
"""
return self.analyze()
30 changes: 30 additions & 0 deletions tests/generic_file_test.py
@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-

from __future__ import (division, absolute_import, unicode_literals,
print_function)

from file_metadata.generic_file import GenericFile
from tests import fetch_file, unittest


class DerivedFile(GenericFile):

def analyze(self):
# Only use the `_analyze_test` functions for tests
return GenericFile.analyze(self, prefix='analyze_test')

def analyze_test1(self):
return {"test1": "test1"}

def analyze_test2(self):
return {"test2": "test2"}


class DerivedFileTest(unittest.TestCase):

def setUp(self):
self.uut = DerivedFile(fetch_file('ascii.txt'))

def test_metadata(self):
self.assertEqual(self.uut.metadata, {'test1': 'test1',
'test2': 'test2'})

0 comments on commit 45eae38

Please sign in to comment.