In [260]:
import json
import os
import re

from PIL import Image

In [261]:
with open('./data/book-test-set/integrated_api_responses.json', 'r', encoding="utf-8") as f:
    integrated_response = json.load(f)

In [262]:
def get_file_name(integrated_response_item):
    return integrated_response_item['book_cover']['image_name']

def get_top_title_google_books(integrated_response_item):
    first_title_query = list(integrated_response_item['google_book_data'])[0]
    first_query = integrated_response_item['google_book_data'][first_title_query]
    if 'items' in first_query:
        return first_query['items'][0]['volumeInfo']['title']
    else:
        return "No title found"

def get_top_title_web_annotation(integrated_response_item):
    title = integrated_response_item['book_cover']['web_entities'][0]['description']
    if title == "":
        title = "NO_TITLE"
    return title

def get_confidence(integrated_response_item):
    return integrated_response_item['book_cover']['web_entities'][0]['score']

def get_book_title_google_books(integrated_response_item):
    title = get_top_title_google_books(integrated_response_item)
    file = get_file_name(integrated_response_item)
    return file + ' -> ' + title

def get_book_title_web_annotation(integrated_response_item):
    title = get_top_title_web_annotation(integrated_response_item)
    file = get_file_name(integrated_response_item)
    confidence = get_confidence(integrated_response_item)
    return f'({confidence:.2f}) {file: >30} -> {title}'

In [263]:
# print(json.dumps(integrated_response[0], indent=4))

In [264]:
for item in integrated_response:
    print(get_book_title_web_annotation(item))

(1.32)     user1433_1806_book_1_3.jpg -> Percy Jackson & the Olympians: The Lightning Thief
(1.45)     user1433_1806_book_2_4.jpg -> The Outsiders
(1.11)     user1433_1806_book_3_5.jpg -> Mockingjay
(0.84)     user1433_1806_book_4_6.jpg -> Cinder
(0.38)     user1433_1806_book_5_8.jpg -> Font
(1.13)     user1433_1806_book_6_9.jpg -> Divergent
(1.50)    user1433_1806_book_7_10.jpg -> The Book Thief
(1.38)      user1436_577_book_1_5.jpg -> Pete the Cat: Five Little Pumpkins
(1.21)      user1436_577_book_2_6.jpg -> Un automne avec Pop
(0.42)      user1436_577_book_3_7.jpg -> NO_TITLE
(1.49)     user1437_1220_book_1_6.jpg -> Algebra 1: A Common Core Curriculum
(1.06)     user1437_1220_book_2_7.jpg -> Geometry
(0.42)     user1443_1809_book_1_7.jpg -> Line
(0.41)     user1443_1809_book_2_8.jpg -> Line
(0.41)     user1443_1809_book_3_9.jpg -> Line
(0.40)    user1443_1809_book_4_10.jpg -> Line
(0.43)    user1443_1809_book_5_11.jpg -> Line
(0.41)    user1443_1809_book_6_12.jpg -> Line
(1.41)   u

In [265]:
count = 0
for integrated_response_item in integrated_response:
    confidence = float(get_confidence(integrated_response_item))
    if confidence > 1.0:
        count += 1

print(count)

275


In [266]:
def clean_title(title):
    cleaned = re.sub(r'[^a-zA-Z0-9 ]', '', title)
    cleaned = re.sub(r'\s+', '_', cleaned)
    cleaned = cleaned.lower()
    return cleaned

def write_image_with_new_title(image_path, new_title, output_directory):
    image = Image.open(image_path)
    cleaned_title = clean_title(new_title) + '.jpg'
    output_path = os.path.join(output_directory, cleaned_title)
    print(f'Writing {output_path}')
    image.save(output_path, 'JPEG')

In [267]:
for item in integrated_response:
    file_name = get_file_name(item)
    title = get_top_title_web_annotation(item)
    write_image_with_new_title(os.path.join('./data/book-test-set/book', file_name),
                               title,
                               './data/book-test-set/new_titles')

Writing ./data/book-test-set/new_titles\percy_jackson_the_olympians_the_lightning_thief.jpg
Writing ./data/book-test-set/new_titles\the_outsiders.jpg
Writing ./data/book-test-set/new_titles\mockingjay.jpg
Writing ./data/book-test-set/new_titles\cinder.jpg
Writing ./data/book-test-set/new_titles\font.jpg
Writing ./data/book-test-set/new_titles\divergent.jpg
Writing ./data/book-test-set/new_titles\the_book_thief.jpg
Writing ./data/book-test-set/new_titles\pete_the_cat_five_little_pumpkins.jpg
Writing ./data/book-test-set/new_titles\un_automne_avec_pop.jpg
Writing ./data/book-test-set/new_titles\notitle.jpg
Writing ./data/book-test-set/new_titles\algebra_1_a_common_core_curriculum.jpg
Writing ./data/book-test-set/new_titles\geometry.jpg
Writing ./data/book-test-set/new_titles\line.jpg
Writing ./data/book-test-set/new_titles\line.jpg
Writing ./data/book-test-set/new_titles\line.jpg
Writing ./data/book-test-set/new_titles\line.jpg
Writing ./data/book-test-set/new_titles\line.jpg
Writing ./d