# PngBin Creator

This notebook helps you convert a whole directory of files into a series of PngBin images and a corresponding metadata database file.

#### How to use:
- Modify the [**configurations**](#Configurations) as needed.
- At menu bar, select Run > Run All Cells.
- Scroll to the end of this notebook for a progress output.

## Configurations

---
Path to a directory that contains your files you want to convert to PngBin images.
> It must exist and contain at least one file.  
> Any empty folders will be ignored.

In [None]:
INPUT_DIR = 'myfiles'

---
Path to a directory that will store output PngBin images.
> If it doesn't exist, It will be automatically created.

In [None]:
OUTPUT_DIR = 'outputs/images'

---
Path to an output metadata database file.
> If it exists, new metadata will be added.  
> If it doesn't exist, It will be automatically created.

In [None]:
META_PATH = 'outputs/meta.db'

---
Width and height of the output images.
> The multiple of `WIDTH` and `HEIGHT` must be divisible by 4.

In [None]:
WIDTH, HEIGHT = 2508, 2508

---
Filename format for output images.
> It must have exactly one placeholder `%d` as the incremental number.  
> Tip: By using `%04d` will make it a 4-zero padding number. (e.g. IMAGE_0069.png)

In [None]:
FN_FORMAT = 'IMAGE_%02d.png'

#### Note on filename and incremental number.
Each PngBin image output will have filenames as specified by `FN_FORMAT` and the `%d` will be replaced by the incremental number.  
Each filenames will also be used as identifier for updating `urls` in metadata database file.  
The incremental number will also be used as `id` for each images in metadata database file.  
The incremental number will start with the largest value `id` in metadata database file added by 1.  
If there is no `id` in metadata database file (empty database), then it will start with number 1.

---

### Import

In [None]:
import os
import sqlite3
import itertools

try:
    from pngbin import ChainWriter
except ModuleNotFoundError:
    import sys
    sys.path.append(os.path.abspath('..'))
    from pngbin import ChainWriter

### Basic Configuration Validation

In [None]:
assert os.path.isdir(INPUT_DIR), 'INPUT_DIR must exist and be a directory.'
assert any(x[2] for x in os.walk(INPUT_DIR)), 'INPUT_DIR must contain at least one file.'
assert not os.path.exists(OUTPUT_DIR) or os.path.isdir(OUTPUT_DIR), 'OUTPUT_DIR must be a directory if it exists.'
assert not os.path.exists(META_PATH) or os.path.isfile(META_PATH), 'META_PATH must be a file if it exists.'
assert WIDTH * HEIGHT % 4 == 0, 'The multiple of WIDTH and HEIGHT must be divisible by 4.'
assert all(x > 0 for x in [WIDTH, HEIGHT]), 'WIDTH and HEIGHT must have value more then 0.'

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

_META_DIR = os.path.split(META_PATH)[0]
if not os.path.exists(_META_DIR):
    os.makedirs(_META_DIR)

### Metadata Database Creation

In [None]:
_META_CREATE_SQL = """
CREATE TABLE "images" (
	"id"	INTEGER,
	"key"	BLOB NOT NULL,
	"iv"	BLOB NOT NULL,
	"width"	INTEGER NOT NULL,
	"height"	INTEGER NOT NULL,
	"name"	TEXT NOT NULL,
	PRIMARY KEY("id")
);
CREATE TABLE "files" (
	"offset"	INTEGER NOT NULL,
	"length"	INTEGER NOT NULL,
	"path"	TEXT NOT NULL UNIQUE,
	"images_id"	INTEGER NOT NULL,
	FOREIGN KEY("images_id") REFERENCES "images"("id")
);
CREATE TABLE "urls" (
	"url"	TEXT NOT NULL UNIQUE,
	"images_id"	INTEGER NOT NULL,
	FOREIGN KEY("images_id") REFERENCES "images"("id")
);
CREATE UNIQUE INDEX "idx_files_path" ON "files" (
	"path"
);
CREATE INDEX "idx_urls_images_id" ON "urls" (
	"images_id"
);
"""

new_meta = not os.path.exists(META_PATH)
conn = sqlite3.connect(META_PATH)
try:
    if new_meta:
        with conn:
            conn.executescript(_META_CREATE_SQL)

    x = conn.execute('SELECT MAX(id) FROM "images"').fetchone()[0]
    _START_ID = 1 if x is None else x + 1
    
finally:
    conn.close()

### Global dict and functions declaration

In [None]:
D = {}

def iter_info():
    for img_id in itertools.count(_START_ID):
        name = FN_FORMAT % img_id
        D.update({
            'fobj': open(os.path.join(OUTPUT_DIR, name), 'xb'),
            'width': WIDTH,
            'height': HEIGHT,
            'id': img_id,
            'name': name
        })
        print(name)
        yield dict((k, D[k]) for k in ['fobj', 'width', 'height'])
        # Call close manually because ChainWriter only finishes
        # the image file automatically, but does not close.
        D['fobj'].close()

def on_writer_created(w):
    D['conn'].execute(
        'INSERT INTO images (id, key, iv, width, height, name) '
        'VALUES (:id, :key, :iv, :width, :height, :name)',
        dict(D, key=w.key, iv=w.iv)
    )

def iter_file(writer):
    abs_input_dir = os.path.abspath(INPUT_DIR)
    for parent, _, filenames in os.walk(abs_input_dir):
        for abs_path in (os.path.join(parent, x) for x in filenames):
            D['conn'].execute(
                'INSERT INTO files (offset, length, path, images_id) '
                'VALUES (:offset, :length, :path, :images_id)',
                {
                    'offset': writer.tell() % (WIDTH * HEIGHT * 4),
                    'length': os.path.getsize(abs_path),
                    'path': os.path.relpath(abs_path, abs_input_dir).replace('\\', '/'),
                    'images_id': D.get('id', _START_ID)
                }
            )
            yield abs_path

def copy_stream(reader, writer, buffer_size=2**20):
    while True:
        data = reader.read(buffer_size)
        if not data:
            break
        writer.write(data)

### Working the Magic

In [None]:
D['conn'] = sqlite3.connect(META_PATH)
try:
    with D['conn']:
        with ChainWriter(iter_info(), True, on_writer_created) as writer:
            for path in iter_file(writer):
                with open(path, 'rb') as f:
                    copy_stream(f, writer)
    print('DONE!')
finally:
    D['conn'].close()
    if 'fobj' in D:
        # This will close the last image file object.
        D['fobj'].close()