Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
377 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
env/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*,cover | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
docs/_static | ||
docs/_templates | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# PyCharm | ||
.idea | ||
|
||
# Python decouple settings file | ||
settings.ini | ||
|
||
log/ | ||
py27/ | ||
py35/ | ||
flake8/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Marc Galofré |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
========== | ||
Change log | ||
========== | ||
|
||
0.1 (2017-06-21) | ||
---------------- | ||
|
||
* Initial release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Copyright (c) 2018 - APSL | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining | ||
a copy of this software and associated documentation files (the | ||
"Software"), to deal in the Software without restriction, including | ||
without limitation the rights to use, copy, modify, merge, publish, | ||
distribute, sublicense, and/or sell copies of the Software, and to | ||
permit persons to whom the Software is furnished to do so, subject to | ||
the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be | ||
included in all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | ||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | ||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | ||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
include AUTHORS | ||
include LICENSE | ||
include CHANGELOG.rst | ||
include README.rst |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
__author__ = 'Marc Galofré' | ||
__email__ = 'mgalofre@apsl.net' | ||
__version__ = '0.1' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# -*- coding: utf-8 -*- | ||
import logging | ||
|
||
from six import string_types | ||
from django.conf import settings | ||
from bulk_update.helper import bulk_update | ||
from faker import Faker | ||
from multiprocessing import Pool | ||
|
||
ANONYMIZER_MODULE_NAME = 'anonymizers' | ||
DEFAULT_CHUNK_SIZE = 50 | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
try: | ||
faker = Faker(settings.LANGUAGE_CODE) | ||
except AttributeError: | ||
faker = Faker() | ||
|
||
|
||
class BaseAnonymizer: | ||
|
||
def __init__(self): | ||
try: | ||
getattr(self, 'model') | ||
getattr(self, 'attributes') | ||
except AttributeError: | ||
logger.info('ERROR: Your anonymizer is missing the model or attributes definition!') | ||
exit(1) | ||
|
||
def get_query_set(self): | ||
""" | ||
You can override this in your Anonymizer. | ||
:return: QuerySet | ||
""" | ||
return self.model.objects.all() | ||
|
||
def get_allowed_value(self, replacer, model_instance, field_name): | ||
retval = replacer() | ||
max_length = model_instance._meta.get_field(field_name).max_length | ||
if max_length: | ||
retval = retval[:max_length] | ||
return retval | ||
|
||
def _process_instances(self, instances): | ||
count_fields = 0 | ||
count_instances = 0 | ||
|
||
for model_instance in instances: | ||
for field_name, replacer in self.attributes: | ||
if callable(replacer): | ||
replaced_value = self.get_allowed_value(replacer, model_instance, field_name) | ||
elif isinstance(replacer, string_types): | ||
replaced_value = replacer | ||
else: | ||
raise TypeError('Replacers need to be callables or Strings!') | ||
setattr(model_instance, field_name, replaced_value) | ||
count_fields += 1 | ||
count_instances += 1 | ||
return instances, count_instances, count_fields | ||
|
||
def _run_parallel(self, instances, parallel_processes): | ||
count_instances = 0 | ||
count_fields = 0 | ||
instances_processed = [] | ||
chunks = [instances[i:i + DEFAULT_CHUNK_SIZE] for i in range(0, len(instances), DEFAULT_CHUNK_SIZE)] | ||
pool = Pool(processes=parallel_processes) | ||
futures = [pool.apply_async(self._process_instances, (objs,)) for objs in chunks] | ||
for future in futures: | ||
instances_parallel, count_instances_parallel, count_fields_parallel = future.get() | ||
instances_processed += instances_parallel | ||
count_instances += count_instances_parallel | ||
count_fields += count_fields_parallel | ||
pool.close() | ||
pool.join() | ||
return instances_processed, count_instances, count_fields | ||
|
||
def run(self, batch_size=None, parallel_processes=0): | ||
instances = self.get_query_set() | ||
batch_size = DEFAULT_CHUNK_SIZE if batch_size is None else int(batch_size) | ||
|
||
if parallel_processes > 1: | ||
instances_processed, count_instances, count_fields = self._run_parallel(instances, parallel_processes) | ||
else: | ||
instances_processed, count_instances, count_fields = self._process_instances(instances) | ||
|
||
bulk_update(instances_processed, update_fields=[attrs[0] for attrs in self.attributes], | ||
batch_size=batch_size) | ||
|
||
return len(self.attributes), count_instances, count_fields |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# -*- coding: utf-8 -*- | ||
from importlib import import_module | ||
from importlib.util import find_spec | ||
import inspect | ||
import sys | ||
|
||
from django.conf import settings | ||
from django.core.management import BaseCommand | ||
|
||
from hattori.base import ANONYMIZER_MODULE_NAME, BaseAnonymizer | ||
|
||
|
||
class Command(BaseCommand): | ||
help = 'This tool replaces real (user-)data of model instances in your database with mock data.' | ||
modules = None # List of anonymizers modules. They can be placed in every app | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
'-a', | ||
'--app', | ||
help='Only anonymize the given app', | ||
dest="app", | ||
metavar="APP" | ||
) | ||
parser.add_argument( | ||
"-m", | ||
"--model", | ||
"--models", | ||
dest="models", | ||
help="Models to anonymize. Separate multiples by comma.", | ||
metavar="MODEL" | ||
) | ||
parser.add_argument( | ||
"-b", | ||
"--batch-size", | ||
dest="batch_size", | ||
help="batch size used in the bulk_update of the instances. Depends on the DB machine. Use 500 in vagrant.", | ||
metavar="BATCH_SIZE", | ||
type=int | ||
) | ||
parser.add_argument( | ||
"-p", | ||
"--parallel", | ||
dest="parallel", | ||
help="Number of parallel processes for parallel execution", | ||
metavar="PARALLEL", | ||
type=int, | ||
default=0 | ||
) | ||
|
||
def handle(self, *args, **options): | ||
models = None | ||
if options['models'] is not None: | ||
models = [m.strip() for m in options['models'].split(',')] | ||
|
||
if options['parallel'] > 0: | ||
self.stdout.write('Running in parallel mode with {} concurrent processes'.format(options['parallel'])) | ||
self.stdout.write('Autodiscovering anonymizers...') | ||
|
||
modules = self._autodiscover_module(ANONYMIZER_MODULE_NAME, app=options['app']) | ||
self.stdout.write('Found anonymizers for {} apps'.format(len(modules))) | ||
total_replacements_count = 0 | ||
for module in modules: | ||
self.stdout.write('{}:'.format(module.__package__)) | ||
anonymizers = self._get_app_anonymizers(module, models=models) | ||
|
||
if len(anonymizers) == 0: | ||
self.stdout.write('- No anonymizers or skipped by --app or --model arguments') | ||
continue | ||
|
||
for anonymizer_class_name in anonymizers: | ||
anonymizer = getattr(module, anonymizer_class_name)() | ||
self.stdout.write('- {}'.format(anonymizer.model.__name__)) | ||
# Start the anonymizing process | ||
number_of_replaced_fields = anonymizer.run(options['batch_size'], options['parallel']) | ||
self.stdout.write('-- {} fields, {} model instances, {} total replacements'.format( | ||
number_of_replaced_fields[0], | ||
number_of_replaced_fields[1], | ||
number_of_replaced_fields[2] | ||
)) | ||
total_replacements_count += number_of_replaced_fields[2] | ||
self.stdout.write(self.style.SUCCESS('DONE. Replaced {} values in total'.format(total_replacements_count))) | ||
|
||
def _autodiscover_module(self, module_name, app=None): | ||
apps_to_search = [app] if app else settings.INSTALLED_APPS | ||
|
||
modules = [] | ||
for app in apps_to_search: | ||
try: | ||
import_module(app) | ||
app_path = sys.modules[app].__path__ | ||
except AttributeError: | ||
continue | ||
except ImportError: | ||
self.stdout.write(self.style.ERROR('ERROR: Can not find app ' + app)) | ||
exit(1) | ||
try: | ||
find_spec(module_name, app_path) | ||
except ImportError: | ||
continue | ||
import_module('%s.%s' % (app, module_name)) | ||
modules.append(sys.modules['%s.%s' % (app, module_name)]) | ||
return modules | ||
|
||
def _get_app_anonymizers(self, module, models=None): | ||
if models: | ||
return [m[0] for m in inspect.getmembers(module, inspect.isclass) | ||
if BaseAnonymizer in m[1].__bases__ and m[1].model.__name__ in models] | ||
else: | ||
return [m[0] for m in inspect.getmembers(module, inspect.isclass) if BaseAnonymizer in m[1].__bases__] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# -*- encoding: utf-8 -*- | ||
|
||
import os | ||
import re | ||
import codecs | ||
|
||
try: | ||
from setuptools import setup, find_packages | ||
except ImportError: | ||
from distutils.core import setup | ||
|
||
|
||
def get_version(package): | ||
""" | ||
Return package version as listed in `__version__` in `init.py`. | ||
""" | ||
init_py = codecs.open(os.path.abspath(os.path.join(package, '__init__.py')), encoding='utf-8').read() | ||
return re.search("^__version__ = ['\"]([^'\"]+)['\"]", init_py, re.MULTILINE).group(1) | ||
|
||
|
||
def get_author(package): | ||
""" | ||
Return package author as listed in `__author__` in `init.py`. | ||
""" | ||
init_py = codecs.open(os.path.abspath(os.path.join(package, '__init__.py')), encoding='utf-8').read() | ||
return re.search("^__author__ = ['\"]([^'\"]+)['\"]", init_py, re.MULTILINE).group(1) | ||
|
||
|
||
def get_email(package): | ||
""" | ||
Return package email as listed in `__email__` in `init.py`. | ||
""" | ||
init_py = codecs.open(os.path.abspath(os.path.join(package, '__init__.py')), encoding='utf-8').read() | ||
return re.search("^__email__ = ['\"]([^'\"]+)['\"]", init_py, re.MULTILINE).group(1) | ||
|
||
|
||
def get_long_description(): | ||
""" | ||
return the long description from README.rst file | ||
:return: | ||
""" | ||
return codecs.open(os.path.join(os.path.dirname(__file__), 'README.rst'), encoding='utf-8').read() | ||
|
||
|
||
setup( | ||
name='django-hattori', | ||
version=get_version('hattori'), | ||
author=get_author('hattori'), | ||
author_email=get_email('hattori'), | ||
url='https://github.com/APSL/django-hattori', | ||
packages=find_packages(exclude=['tests*']), | ||
description='Command to anonymize sensitive data.', | ||
long_description=get_long_description(), | ||
install_requires=[ | ||
'Django>=1.8', | ||
'django-bulk-update>=2.2.0', | ||
'Faker>=0.8.13', | ||
'six', | ||
], | ||
classifiers=[ | ||
'Environment :: Web Environment', | ||
'Intended Audience :: Developers', | ||
'Programming Language :: Python', | ||
'Programming Language :: Python :: 3', | ||
'Programming Language :: Python :: 3.5', | ||
'Operating System :: OS Independent', | ||
'Topic :: Software Development' | ||
], | ||
include_package_data=True, | ||
zip_safe=False, | ||
) |