# Script to convert generic TIFFs to pyramidal tiled

See key post at https://legacy.imagemagick.org/discourse-server/viewtopic.php?t=20193 for background on using command-line ImageMagick to do the conversion.

In [None]:
import os
import pandas as pd

source_image_root_directory = '/users/baskausj/gallery_digital_image_archive/'
destination_image_root_directory = '/users/baskausj/gallery_pyramidal_tiffs/'

image_data_frame = pd.read_csv('/users/baskausj/github/vandycite/gallery_works/image_upload/image_dimensions.csv')
# Convert some columns to integers
image_data_frame[['kilobytes', 'height', 'width']] = image_data_frame[['kilobytes', 'height', 'width']].astype(int)
image_data_frame[['subdir']] = image_data_frame[['subdir']].astype(str)

'''
# For testing purposes, just use the first few rows of the works metadata
test_rows = 4
image_data_frame = image_data_frame.head(test_rows).copy()
'''

image_data_frame.head()


In [None]:
# Filter only rows that have one of the TIFF file extension variants
tiff_alt_extensions = ['tif', 'TIF', 'tiff', 'TIFF']   
tiffs_frame = image_data_frame[image_data_frame['extension'].isin(tiff_alt_extensions)]
tiffs_frame.head()

In [None]:
# Basic conversion command is
# os.system('convert 1979.0342P.tif -define tiff:tile-geometry=256x256 -depth 8 ptif:1979.0342P_tiled.tif')

for index, image in tiffs_frame.iterrows():
    in_path = source_image_root_directory + image['subdir'] + '/' + image['name']
    
    # Create the subdirectory if it doesn't already exist
    os.makedirs(destination_image_root_directory + image['subdir'], exist_ok=True)
    out_path = destination_image_root_directory + image['subdir'] + '/' + image['name']
    print(image['name'])
    
    # Redirect (append) errors to a log file. Based on practical experience, most errors can be ignored.
    # Note: need to enclose file paths in quotes because filenames sometimes include spaces.
    command_string = 'convert "' + in_path + '" -define tiff:tile-geometry=256x256 -depth 8 ptif:"' + out_path + '" 2>> ~/gallery_image_conversion_log.txt'
    #print(command_string)
    os.system(command_string)

print('done')

## Compressing TIFFs using LZW or deflate (a.k.a. zip)

See post at https://legacy.imagemagick.org/discourse-server/viewtopic.php?t=13484 for syntax.

Note: did a test upload and using either on TIFF is file with Commons. The LZW is relatively ineffective and sometimes actually makes the filesize larger.

In [None]:
# Basic conversion command is
# os.system('convert 1994.012_original.tif -compress lzw 1994.012.tif')

import os
import pandas as pd

source_image_root_directory = '/users/baskausj/uncompressed/'
destination_image_root_directory = '/users/baskausj/compressed/'

image_list_frame = pd.read_csv('/users/baskausj/github/vandycite/gallery_works/image_upload/large_tiffs.csv')
image_list_frame.head()

In [None]:
for index, image in image_list_frame.iterrows():
    in_path = source_image_root_directory + image['name']
    out_path = destination_image_root_directory + image['name']
    
    #command_string = 'convert "' + in_path + '" -compress lzw "' + out_path + '"'
    command_string = 'convert "' + in_path + '" -compress zip "' + out_path + '"'
    print(command_string)
    os.system(command_string)


Code to grab the filesize of the compressed files

In [None]:
import csv
def write_dicts_to_csv(table, filename, fieldnames):
    with open(filename, 'w', newline='', encoding='utf-8') as csv_file_object:
        writer = csv.DictWriter(csv_file_object, fieldnames=fieldnames)
        writer.writeheader()
        for row in table:
            writer.writerow(row)

out_images = []
for index, image in image_list_frame.iterrows():
    out_image = {'name': image['name']}
    in_path = source_image_root_directory + image['name']
    out_path = destination_image_root_directory + image['name']

    out_image['original_kilobytes'] = round(os.path.getsize(in_path)/1024)
    out_image['compressed_kilobytes'] = round(os.path.getsize(out_path)/1024)    
    out_images.append(out_image)
    
fieldnames = list(out_images[0].keys())
write_dicts_to_csv(out_images, source_image_root_directory + 'image_dimensions.csv', fieldnames)

print('done')

Do an inner join with the original file information CSV

In [None]:
import pandas as pd

source_image_root_directory = '/users/baskausj/uncompressed/'
source_directory = '/users/baskausj/github/vandycite/gallery_works/image_upload/'

left_url = source_image_root_directory + 'image_dimensions_full.csv'
right_url = source_directory + 'images.csv'

left = pd.read_csv(left_url, na_filter=False, dtype = str)
right = pd.read_csv(right_url, na_filter=False, dtype = str)
merged_frame = pd.merge(right, left, on='local_filename', how='inner')
merged_frame.head()

merged_frame.to_csv(source_directory + 'oversized_images.csv', index = False)