-
Notifications
You must be signed in to change notification settings - Fork 0
/
shrink_images.py
104 lines (80 loc) · 2.8 KB
/
shrink_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""Translates the original image files into a SQLite table of smaller images.
Presuming `sqlite_output.db` is an empty file, or otherwise uninitialized
SQLite3 file, use as:
$ python shrink_images.py sqlite_output.db3
This scans for JPEGs in directories under `img/raw/`, as specified by the module
constant `_RAW_ROOT_DIR`, and shoves 'em in a four-column table named `slides`
in that destination DB file.
The four columns of `slides`, in order:
1. `collection`, the name of the slide deck this slide is drawn from
2. `filename`, the filename of the slide image file
3. `file_id_num`, the 1-indexed position of this image file in the sort order
of this collection's file name
4. `jpeg_base64`, the Base64 encoding of the image (JPEG format)
"""
import base64
import io
import pathlib
import sqlite3
import sys
from PIL import Image
_RAW_ROOT_DIR = 'img/raw/'
_CREATE_TABLE_QUERY = """
CREATE TABLE slides(
collection text,
filename text,
file_id_num integer,
jpeg_base64 text
);
"""
_INSERT_IMAGE_QUERY = """
INSERT INTO slides (collection, filename, file_id_num, jpeg_base64)
VALUES (?, ?, ?, ?)
"""
def process_image(img_path: pathlib.Path) -> str:
"""Loads the given big JPEG and returns a shrunken, Base64 version."""
# Load and resize the image:
img_orig = Image.open(img_path)
w_orig, h_orig = img_orig.size
w_new = 600
h_new = int(float(h_orig * w_new) / w_orig)
img_new = img_orig.resize((w_new, h_new), Image.Resampling.LANCZOS)
# Serialize it into "a file":
out_bytes_io = io.BytesIO()
img_new.save(out_bytes_io, format='jpeg')
# Reload the file and return its bytes as Base64:
out_bytes_io.seek(0)
out_bytes = out_bytes_io.read()
out_b64 = base64.b64encode(out_bytes)
return out_b64
def dir_to_collection_name(dir_path: pathlib.Path) -> str:
return
def main():
raw_root_path = pathlib.Path(_RAW_ROOT_DIR)
img_paths = tuple(raw_root_path.glob('*/*.jpeg'))
db_filename = sys.argv[1]
conn = sqlite3.connect(db_filename)
cur = conn.cursor()
cur.execute(_CREATE_TABLE_QUERY)
n = len(img_paths)
prev_coll = None
file_id_num = 1
for i, p in enumerate(sorted(img_paths), 1):
small_jpeg_b64 = process_image(p)
collection = p.parent.name.replace("__", ", ").replace("_", " ")
if collection != prev_coll:
file_id_num = 1
insert_tuple = (
collection, # collection
p.name, # filename
file_id_num, # file_id_num
small_jpeg_b64 # jpeg_base64
)
cur.execute(_INSERT_IMAGE_QUERY, insert_tuple)
conn.commit()
print(p, f'processed ({i} of {n})')
file_id_num += 1
prev_coll = collection
conn.close()
if __name__ == "__main__":
main()