Skip to content
Permalink
Browse files

initial commit

  • Loading branch information...
tsurowiec committed Oct 9, 2018
0 parents commit a0271a5ffb4b610cbb40eaf2dfcafa63422cb742
Showing with 10,248 additions and 0 deletions.
  1. +19 −0 .env.dist
  2. +11 −0 .gitignore
  3. +47 −0 .travis.yml
  4. +20 −0 Dockerfile
  5. +53 −0 Makefile
  6. +174 −0 README.md
  7. +20 −0 behat.yml.dist
  8. +39 −0 bin/console
  9. +71 −0 composer.json
  10. +4,627 −0 composer.lock
  11. +10 −0 config/bundles.php
  12. +3 −0 config/packages/dev/routing.yaml
  13. +17 −0 config/packages/doctrine.yaml
  14. +5 −0 config/packages/doctrine_migrations.yaml
  15. +30 −0 config/packages/framework.yaml
  16. +15 −0 config/packages/old_sound_rabbit_mq.yaml
  17. +31 −0 config/packages/prod/doctrine.yaml
  18. +3 −0 config/packages/routing.yaml
  19. +17 −0 config/packages/snc_redis.yaml
  20. +4 −0 config/packages/test/framework.yaml
  21. +15 −0 config/packages/test/old_sound_rabbit_mq.yaml
  22. +3 −0 config/packages/test/routing.yaml
  23. +6 −0 config/packages/translation.yaml
  24. +3 −0 config/routes.yaml
  25. +91 −0 config/services.yaml
  26. +8 −0 config/services_test.yaml
  27. +2 −0 docker-compose.override.yml.dist
  28. +20 −0 docker-compose.test-mysql.yml
  29. +18 −0 docker-compose.test-postgres.yml
  30. +51 −0 docker-compose.yml
  31. +30 −0 features/Init/init.feature
  32. +111 −0 features/Run/Chunk/queueChunks.feature
  33. +39 −0 features/Run/Mask/faker.feature
  34. +39 −0 features/Run/Mask/hashify.feature
  35. +39 −0 features/Run/Mask/starify.feature
  36. +64 −0 features/Run/Refine/cascadeRefineOnlyToForeignSide.feature
  37. +54 −0 features/Run/Refine/notNullReference.feature
  38. +54 −0 features/Run/Refine/nullableReference.feature
  39. +56 −0 features/Run/Refine/refineExcluded.feature
  40. +41 −0 features/Run/Refine/selfReferenceNotNull.feature
  41. +42 −0 features/Run/Refine/selfReferenceNullable.feature
  42. +47 −0 features/Run/Subset/head.fails.feature
  43. +71 −0 features/Run/Subset/head.feature
  44. +71 −0 features/Run/Subset/range.feature
  45. +72 −0 features/Run/Subset/tail.feature
  46. +96 −0 features/Run/finishCommand.feature
  47. +54 −0 features/bootstrap/ChunkCounterContext.php
  48. +130 −0 features/bootstrap/CommandContext.php
  49. +41 −0 features/bootstrap/ConfigFileContext.php
  50. +241 −0 features/bootstrap/DatabaseContext.php
  51. +11 −0 features/bootstrap/bootstrap.php
  52. +4 −0 phpspec.yml
  53. +39 −0 public/index.php
  54. +25 −0 spec/Config/ColumnConfigFactorySpec.php
  55. +45 −0 spec/Config/ConfigFactorySpec.php
  56. +50 −0 spec/Config/Serializer/ConfigDenormalizerSpec.php
  57. +60 −0 spec/Config/Serializer/TableConfigDenormalizerSpec.php
  58. +30 −0 spec/Config/StrategyExtractorSpec.php
  59. +45 −0 spec/Config/TableConfigFactorySpec.php
  60. +41 −0 spec/Fogger/Mask/HashifyMaskSpec.php
  61. +35 −0 spec/Fogger/Mask/StarifyMaskSpec.php
  62. +61 −0 spec/Fogger/Serializer/TableDenormalizerSpec.php
  63. +101 −0 src/Command/FinishCommand.php
  64. +57 −0 src/Command/InitCommand.php
  65. +140 −0 src/Command/RunCommand.php
  66. +21 −0 src/Config/ColumnConfigFactory.php
  67. +31 −0 src/Config/ConfigFactory.php
  68. +45 −0 src/Config/ConfigLoader.php
  69. +28 −0 src/Config/Model/ColumnConfig.php
  70. +38 −0 src/Config/Model/Config.php
  71. +43 −0 src/Config/Model/TableConfig.php
  72. +32 −0 src/Config/Serializer/ConfigDenormalizer.php
  73. +33 −0 src/Config/Serializer/TableConfigDenormalizer.php
  74. +20 −0 src/Config/StrategyExtractor.php
  75. +29 −0 src/Config/TableConfigFactory.php
  76. +26 −0 src/DependencyInjection/Compiler/FoggerChunkWriterPass.php
  77. +24 −0 src/DependencyInjection/Compiler/FoggerMaskStrategyPass.php
  78. +24 −0 src/DependencyInjection/Compiler/FoggerSubsetStrategyPass.php
  79. +44 −0 src/Fogger/Data/ChunkConsumer.php
  80. +44 −0 src/Fogger/Data/ChunkCounter.php
  81. +55 −0 src/Fogger/Data/ChunkDivider.php
  82. +37 −0 src/Fogger/Data/ChunkError.php
  83. +48 −0 src/Fogger/Data/ChunkMessage.php
  84. +67 −0 src/Fogger/Data/ChunkProducer.php
  85. +35 −0 src/Fogger/Data/ChunkReader.php
  86. +39 −0 src/Fogger/Data/DataCopier.php
  87. +62 −0 src/Fogger/Data/Masker.php
  88. +32 −0 src/Fogger/Data/TableQuery.php
  89. +10 −0 src/Fogger/Data/Writer/ChunkWriterInterface.php
  90. +30 −0 src/Fogger/Data/Writer/ChunkWriterProvider.php
  91. +7 −0 src/Fogger/Data/Writer/Exception/ChunkWriterNotFound.php
  92. +76 −0 src/Fogger/Data/Writer/GenericInsertWriter.php
  93. +76 −0 src/Fogger/Data/Writer/MysqlInfileWriter.php
  94. +49 −0 src/Fogger/Mask/AbstractCachedMask.php
  95. +17 −0 src/Fogger/Mask/AbstractMask.php
  96. +9 −0 src/Fogger/Mask/Exception/UnknownMaskException.php
  97. +40 −0 src/Fogger/Mask/FakerMask.php
  98. +20 −0 src/Fogger/Mask/HashifyMask.php
  99. +10 −0 src/Fogger/Mask/MaskStrategyInterface.php
  100. +32 −0 src/Fogger/Mask/MaskStrategyProvider.php
  101. +20 −0 src/Fogger/Mask/StarifyMask.php
  102. +47 −0 src/Fogger/Recipe/MaskReplicator.php
  103. +35 −0 src/Fogger/Recipe/Recipe.php
  104. +55 −0 src/Fogger/Recipe/RecipeFactory.php
  105. +69 −0 src/Fogger/Recipe/RecipeTableFactory.php
  106. +31 −0 src/Fogger/Recipe/StrategyDefinition.php
  107. +67 −0 src/Fogger/Recipe/Table.php
  108. +58 −0 src/Fogger/Refine/RefineExecutor.php
  109. +84 −0 src/Fogger/Refine/Refiner.php
  110. +47 −0 src/Fogger/Schema/ForeignKeysExtractor.php
  111. +35 −0 src/Fogger/Schema/RelationGroups/GrouppedRelationColumns.php
  112. +37 −0 src/Fogger/Schema/RelationGroups/RelationColumn.php
  113. +68 −0 src/Fogger/Schema/RelationGroups/RelationsGroups.php
  114. +29 −0 src/Fogger/Schema/RelationGroupsFactory.php
  115. +78 −0 src/Fogger/Schema/SchemaManipulator.php
  116. +37 −0 src/Fogger/Serializer/TableDenormalizer.php
  117. +33 −0 src/Fogger/Subset/AbstractSubset.php
  118. +57 −0 src/Fogger/Subset/AbstratctHeadOrTailSubset.php
  119. +7 −0 src/Fogger/Subset/Exception/RequiredOptionMissingException.php
  120. +7 −0 src/Fogger/Subset/Exception/SortByColumnRequired.php
  121. +7 −0 src/Fogger/Subset/Exception/UnknownSubsetStrategyException.php
  122. +31 −0 src/Fogger/Subset/HeadSubset.php
  123. +20 −0 src/Fogger/Subset/NoSubset.php
  124. +41 −0 src/Fogger/Subset/RangeSubset.php
  125. +13 −0 src/Fogger/Subset/SubsetStrategyInterface.php
  126. +31 −0 src/Fogger/Subset/SubsetStrategyProvider.php
  127. +30 −0 src/Fogger/Subset/TailSubset.php
  128. +82 −0 src/Kernel.php
  129. +290 −0 symfony.lock
  130. 0 translations/.gitignore
@@ -0,0 +1,19 @@
###> symfony/framework-bundle ###
APP_ENV=prod
APP_SECRET=1b90fda83888e1852b735fdf9d37cf40
###< symfony/framework-bundle ###

###> doctrine/doctrine-bundle ###
# Please provide urls for your source and target databases
# SOURCE_DATABASE_URL=mysql://user:pass@source/source
# TARGET_DATABASE_URL=mysql://user:pass@target/target
###< doctrine/doctrine-bundle ###

###> php-amqplib/rabbitmq-bundle ###
# RABBITMQ_URL=amqp://user:pass@rabbit:5672
###< php-amqplib/rabbitmq-bundle ###

###> snc/redis-bundle ###
# passwords that contain special characters (@, %, :, +) must be urlencoded
# REDIS_URL=redis://redis
###< snc/redis-bundle ###
@@ -0,0 +1,11 @@
###> symfony/framework-bundle ###
/.env
/public/bundles/
/var/
/vendor/
###< symfony/framework-bundle ###

###> behat/symfony2-extension ###
behat.yml
###< behat/symfony2-extension ###
docker-compose.override.yml
@@ -0,0 +1,47 @@
language: php

env:
global:
- APP_ENV=test

matrix:
include:
- php: '7.2'
services:
- postgresql
- redis-server
- rabbitmq
before_script:
- psql -c 'create database suorce;' -U postgres
- psql -c 'create database target;' -U postgres
env:
- SOURCE_DATABASE_URL=pgsql://postgres@localhost/suorce
- TARGET_DATABASE_URL=pgsql://postgres@localhost/target
- RABBITMQ_URL=amqp://guest:guest@localhost
- REDIS_URL=redis://localhost
- php: '7.2'
services:
- mysql
- redis-server
- rabbitmq
before_script:
- mysql -e 'CREATE DATABASE suorce;'
- mysql -e 'CREATE DATABASE target;'
env:
- SOURCE_DATABASE_URL=mysql://root@localhost/suorce
- TARGET_DATABASE_URL=mysql://root@localhost/target
- RABBITMQ_URL=amqp://guest:guest@localhost
- REDIS_URL=redis://localhost


before_install:
- echo "memory_limit=-1" >> ~/.phpenv/versions/$(phpenv version-name)/etc/conf.d/travis.ini

install:
- composer install --no-progress --no-suggest --ansi;

script:
- bin/console cache:clear
- vendor/bin/phpspec run
- bin/console cache:clear
- vendor/bin/behat --format=progress;
@@ -0,0 +1,20 @@
FROM php:7.2.3

# Essentials
RUN apt-get update && buildDeps="libpq-dev libzip-dev libfreetype6-dev libjpeg62-turbo-dev libpng-dev openssh-server libxrender1 libfontconfig1 libxext6" && apt-get install -y $buildDeps git nano wget --no-install-recommends
RUN docker-php-ext-configure gd --with-freetype-dir=/usr/include/ --with-jpeg-dir=/usr/include/ --with-png-dir=/usr/include/ && \
docker-php-ext-install pdo pdo_mysql pdo_pgsql zip bcmath gd

# Composer
RUN wget https://getcomposer.org/composer.phar && mv composer.phar /usr/bin/composer && chmod +x /usr/bin/composer


RUN mkdir /fogger && chmod 777 /fogger
COPY . /app
WORKDIR /app

#RUN composer install --no-dev
RUN composer install

ENTRYPOINT ["php", "bin/console"]
CMD ["--help"]
@@ -0,0 +1,53 @@
DOCKER_COMPOSE = docker-compose -f docker-compose.yml -f docker-compose.override.yml
DOCKER_COMPOSE_TEST_MYSQL = ${DOCKER_COMPOSE} -f docker-compose.test-mysql.yml
DOCKER_COMPOSE_TEST_POSTGRES = ${DOCKER_COMPOSE} -f docker-compose.test-postgres.yml

# --- docker
.PHONY: pull
pull:
${DOCKER_COMPOSE} pull

.PHONY: start
start:
${DOCKER_COMPOSE} up -d
echo "waiting for services to start..."
sleep 30

.PHONY: stop
stop:
${DOCKER_COMPOSE} stop

# --- test
.PHONY: test
test:
make test-mysql
make test-postgres

.PHONY: test-mysql
test-mysql:
${DOCKER_COMPOSE_TEST_MYSQL} up -d --scale=worker=0
sleep 16
${DOCKER_COMPOSE_TEST_MYSQL} run --rm --entrypoint="php" app vendor/bin/behat --format=progress
${DOCKER_COMPOSE_TEST_MYSQL} run --rm --entrypoint="php" app vendor/bin/phpspec run
${DOCKER_COMPOSE} up -d --remove-orphans

.PHONY: test-postgres
test-postgres:
${DOCKER_COMPOSE_TEST_POSTGRES} up -d --scale=worker=0
sleep 16
${DOCKER_COMPOSE_TEST_POSTGRES} run --rm --entrypoint="php" app vendor/bin/behat --format=progress
${DOCKER_COMPOSE_TEST_POSTGRES} run --rm --entrypoint="php" app vendor/bin/phpspec run
${DOCKER_COMPOSE} up -d --remove-orphans

# --- fogger
.PHONY: init
init:
${DOCKER_COMPOSE} run --rm app fogger:init

.PHONY: run
run:
${DOCKER_COMPOSE} run --rm app fogger:run

.PHONY: finish
finish:
${DOCKER_COMPOSE} run --rm app fogger:finish
174 README.md
@@ -0,0 +1,174 @@
# *Fogger* - GDPR friendly database masker

## Purpose

*Fogger* is a tool that solves the problem of data privacy. When developers need to work with production
data but are obliged to comply with GDPR regulations they need a way to get the database copy with all the
sensitive data masked. And while you can always write your own, custom solution to the problem - **you
don't have to anymore** - with *fogger* you are covered.
Apart from masking data you can also subset or even exclude some tables. Don't worry for the realtions with
foreign keys, *fogger* will refine database so everything is clean and shiny.
You can configure various masking and subsetting strategies, and when what *fogger* has to offer is not enough - you
can easily extend it with your own strategies.

## How to use the docker image

*Fogger* requires docker environment, redis and rabbitMq services and two databases: source and target. You can
set up this stack using for example this docker-compose file:
```
version: '2.0'
services:
fogger:
image: tshio/fogger:latest
volumes:
- .:/fogger
environment:
SOURCE_DATABASE_URL: mysql://user:pass@source:3306/source
TARGET_DATABASE_URL: mysql://user:pass@target:3306/target
RABBITMQ_URL: amqp://user:pass@rabbit:5672
REDIS_URL: redis://redis
worker:
image: fogger-app:latest
environment:
SOURCE_DATABASE_URL: mysql://user:pass@source:3306/source
TARGET_DATABASE_URL: mysql://user:pass@target:3306/target
RABBITMQ_URL: amqp://user:pass@rabbit:5672
REDIS_URL: redis://redis
restart: always
command: rabbit:consumer --messages=200 fogger_data_chunks
redis:
image: redis:4
rabbit:
image: rabbitmq:3
environment:
RABBITMQ_DEFAULT_USER: user
RABBITMQ_DEFAULT_PASS: pass
source:
volumes:
- ./dump.sql:/docker-entrypoint-initdb.d/dump.sql
environment:
MYSQL_DATABASE: source
MYSQL_PASSWORD: pass
MYSQL_ROOT_PASSWORD: pass
MYSQL_USER: user
image: mysql:5.7
target:
environment:
MYSQL_DATABASE: target
MYSQL_PASSWORD: pass
MYSQL_ROOT_PASSWORD: pass
MYSQL_USER: user
image: mysql:5.7
```
Note:
- we are mapping volume to fogger's and worker's `/fogger` directory - so the config file would be accesible both in
container and in our host filesystem
- we are importing database content from `dump.sql`

Of course you can modify and adjust the settings to your needs - for example - instead of importing database from
dump file you can pass the existing database url to `fogger` and `worker` containers in the env variables.

Now we can spin up the set-up by `docker-compose up -d`. If the database is huge and you want to speed up the process
you can spawn additional workers executing `docker-compose up -d --scale=worker=4` instead. Give it few seconds for the
services to spin up then you can start with *Fogger*:

*Fogger* gives you three CLI commands:

* `docker-compose run --rm fogger fogger:init` will connect to your source database and prepare a boilerplate
configuration file with the information on tables and columns in your database. This configuration file is a place
where you define witch column should be masked (and how) and witch tables should be subsetted.
See [example config file](Example config file).

* `docker-compose run --rm fogger fogger:run` is the core command that will orchestrate the copying, masking and
subsetting of data. The actual copying will be done by background worker that can scale horizontally. Before `run`
is executed make sure that the config file has been modified to your needs. Available subset and mask strategies has
been described below.

* `docker-compose run --rm fogger fogger:finish` will recreate indexes, refine database so that all the foreign key
constraints are still valid, and then recreate them as well. This command runs automatically after run so you
need to execute it only when you have stopped the `run` command with `ctrl-c`.

* it's done - the masked and subsetted data are in a target database. You can do whatever you please with it. For
example: `docker-compose exec target /usr/bin/mysqldump -u user --password=pass target > target.sql` will save the
dump of masked database in your filesystem.

### Example config file

```
tables:
posts:
columns:
title: { maskStrategy: starify, options: { length: 12 } }
body: { maskStrategy: faker, options: { method: "sentences" } }
subsetStrategy: tail
subsetOptions: { length: 1000 }
comments:
columns:
comment: { maskStrategy: faker, options: { method: "sentences" } }
users:
columns:
email: { maskStrategy: faker, options: { method: "safeEmail" } }
excludes:
- logs
```
This is an example of config file. The boilerplate based on your database schema will be generated for you by
`fogger:init`, all you have to do is fill in the mask strategies on the columns that you want masked and subset
strategies on the tables for witch you only want fraction of the rows.

For the clarity and readability of the config files, all the tables that will not be changed
can be omitted. They will be copied as they are. Similarly you can omit columns that are not to be masked.
Tables from the `excludes` section will exist in the target database, but will be empty.

### List of available strategies

#### Masking data

* hashify - will save the MD5 hash instead of data - you can pass optional argument: `template`

`email: { maskStrategy: "hashify", options: { template: "%s@example.com" } }`

* starify - will save the 10 stars instead of data - you can pass optional argument: `length` to override default 10

`email: { maskStrategy: "starify", options: { }`

* faker - will use a marvelous [faker](https://github.com/fzaninotto/Faker) library. Pass the `method` of faker that
you want to use here as an option.

`email: { maskStrategy: "faker", options: { method: "safeEmail" }`
`date: { maskStrategy: "faker", options: { method: "date", parameters: ["Y::m::d", "2017-12-31 23:59:59"] }`

#### Subsetting data

* range - only copy those rows, where `column` is between `min` and `max`
```
subsetStrategy: range
subsetOptions: { column: "craetedAt", min: "2018-01-01 00:00", max: "2018-01-31 23:59:59" }
```

* head and tail - only copy `length` first / last rows
```
subsetStrategy: head
subsetOptions: { length: 1000 }
```
or
```
subsetStrategy: tail
subsetOptions: { length: 1000 }
```

### Under the hood

If you are interested what really happens:

* source database schema without indices and foreign keys is copied to target
* data is divided into chunks (this includes query modification for subsetting). Chunks are processedby
background workers (using RabbitMQ)
* during copying sensitive data is substituted for masked version - in order to keep the substituted values
consistent, redis is used as a cache
* when all data is copied, *fogger* will recreate indices
* refining cleans up database removing (or setting to null) relations that point to excluded or subsetted table rows
* the last step is to recreate foreign keys

## Contributing

Feel free to contribute to this project! Just fork the code, make any updates and let us know!
@@ -0,0 +1,20 @@
default:
suites:
default:
contexts:
- ConfigFileContext:
- CommandContext:
kernel: '@kernel'
configFactory: '@App\Config\ConfigFactory'
configLoader: '@App\Config\ConfigLoader'
- DatabaseContext:
source: '@doctrine.dbal.source_connection'
target: '@doctrine.dbal.target_connection'
- ChunkCounterContext:
chunkCounter: '@App\Fogger\Data\ChunkCounter'

extensions:
Behat\Symfony2Extension:
kernel:
bootstrap: features/bootstrap/bootstrap.php
class: App\Kernel
@@ -0,0 +1,39 @@
#!/usr/bin/env php
<?php
use App\Kernel;
use Symfony\Bundle\FrameworkBundle\Console\Application;
use Symfony\Component\Console\Input\ArgvInput;
use Symfony\Component\Debug\Debug;
use Symfony\Component\Dotenv\Dotenv;
set_time_limit(0);
require __DIR__.'/../vendor/autoload.php';
if (!class_exists(Application::class)) {
throw new \RuntimeException('You need to add "symfony/framework-bundle" as a Composer dependency.');
}
if (!isset($_SERVER['APP_ENV'])) {
if (!class_exists(Dotenv::class)) {
throw new \RuntimeException('APP_ENV environment variable is not defined. You need to define environment variables for configuration or add "symfony/dotenv" as a Composer dependency to load variables from a .env file.');
}
(new Dotenv())->load(__DIR__.'/../.env');
}
$input = new ArgvInput();
$env = $input->getParameterOption(['--env', '-e'], $_SERVER['APP_ENV'] ?? 'dev', true);
$debug = (bool) ($_SERVER['APP_DEBUG'] ?? ('prod' !== $env)) && !$input->hasParameterOption('--no-debug', true);
if ($debug) {
umask(0000);
if (class_exists(Debug::class)) {
Debug::enable();
}
}
$kernel = new Kernel($env, $debug);
$application = new Application($kernel);
$application->run($input);
Oops, something went wrong.

0 comments on commit a0271a5

Please sign in to comment.
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.