# Dataset downloading

In [None]:
PIXABAY_API_KEY = "<API_KEY>"  # From your Pixabay account
PIXABAY_QUERY = "<YOUR_THEME>"  # The theme to get pictures used as pixels
PIXABAY_CATEGORY = "<CATEGORY>"  # backgrounds, fashion, nature, science, education, feelings, health, people, religion, places, animals, industry, computer, food, sports, transportation, travel, buildings, business, music
PIXABAY_IMAGE_TYPE = "<TYPE>"  # all, photo, illustration, vector
INPUT_IMAGE_PATH = "<INPUT>.jpg"  # Input JPG file name
OUTPUT_IMAGE_WISHED_DIMENSIONS = (80, 80)  # Output dimensions (watch out, will be 80 * 50 pixels width here eg)
OUTPUT_IMAGE_PIXELS_WIDTH = 50  # Small images pixel size

In [None]:
import json
import requests

api_url = f"https://pixabay.com/api"
parameters = {
    "key": PIXABAY_API_KEY,
    "q": PIXABAY_QUERY.replace(" ", "+"),
    "image_type": PIXABAY_IMAGE_TYPE,
    "category": PIXABAY_CATEGORY,
    "per_page": "200"
}
request_url = f'{api_url}?{"&".join([f"{key}={value}" for key, value in parameters.items()])}'

response = requests.get(request_url)
json_response = json.loads(response.text)
print(f'Found {json_response["totalHits"]} pictures about {parameters["q"]} on Pixabay')

In [None]:
import io
import json
import os
from time import sleep, time

import numpy as np
from PIL import Image
from tqdm import tqdm

dataset_path = os.path.join(os.getcwd(), "dataset", f'{PIXABAY_QUERY.replace(" ", "_")}_{OUTPUT_IMAGE_PIXELS_WIDTH}')
if not os.path.isdir(dataset_path):
    os.mkdir(dataset_path)

# That will contain the average RGB values for each image
rgb_values = {}

number_of_pages = int(
    json_response["totalHits"] / int(parameters["per_page"])) + 1
i = 0
# For each result page
for page in range(1, number_of_pages + 1):
    parameters["page"] = page
    request_url = f'{api_url}?{"&".join([f"{key}={value}" for key, value in parameters.items()])}'
    response = requests.get(request_url)
    json_response = json.loads(response.text)
    # For each image from that page
    for result in tqdm(json_response["hits"],
                       desc=f"Page {page}/{number_of_pages}",
                       total=int(parameters["per_page"])):
        image_path = os.path.join(dataset_path, f'{parameters["q"]}-{result["id"]}.jpg')
        # Download or read the image
        if not os.path.isfile(image_path):
            start = time()
            # Download the image
            image_url = result["largeImageURL"]
            response = requests.get(image_url,
                                    headers={"User-Agent": "Chrome"})
            # Resize and save the image to a square of wished width
            stream = io.BytesIO(response.content)
            raw_image = Image.open(stream)
            raw_image = raw_image.resize((OUTPUT_IMAGE_PIXELS_WIDTH, OUTPUT_IMAGE_PIXELS_WIDTH))
            try:
                raw_image.save(image_path)
            except OSError:
                raw_image = raw_image.convert("RGB")
                raw_image.save(image_path)
            end = time()
            elapsed = end - start
            # Only wait if the process took less than 0.66 seconds (100 calls per minute for Pixabay API)
            if elapsed < 0.66:
                sleep(0.66 - elapsed)
        else:
            try:
                raw_image = Image.open(image_path)
            except:
                continue
        # Get the average R, G and B values
        resized_image_array = np.array(raw_image)
        R = np.mean([pixel[0] for row in resized_image_array for pixel in row])
        G = np.mean([pixel[1] for row in resized_image_array for pixel in row])
        B = np.mean([pixel[2] for row in resized_image_array for pixel in row])
        rgb_values[image_path] = (R, G, B)

In [None]:
# Write the mean RGB value for all images
out_file_name = os.path.join(dataset_path, "rgb_mean_values.json")
if not os.path.isfile(out_file_name):
    with open(out_file_name, "w") as handler:
        json.dump(rgb_values, handler, indent=2, sort_keys=True, ensure_ascii=False)

# Image generation

In [None]:
from PIL import Image, UnidentifiedImageError
from IPython.display import display

portrait = Image.open(INPUT_IMAGE_PATH)
portrait = portrait.resize(OUTPUT_IMAGE_WISHED_DIMENSIONS)
display(portrait)

In [None]:
from scipy.spatial import KDTree

def get_closer_rgb(rgb_values, target_rgb, number_of_target=1, wished_index=0):
    """
    Returns the path to the image with the closest RGB value to the target RGB
    """
    # Get all the Pixabay images paths and RGB values
    images_paths = list(rgb_values.keys())
    images_values = np.array(list(rgb_values.values()))
    # Get the closer Pixabay image regarding RGB wished values
    kdtree = KDTree(images_values)
    dist, point = kdtree.query(target_rgb, number_of_target)
    del dist
    return images_paths[point[wished_index]]

In [None]:
import numpy as np

# Transform this image into a numpy array
portrait_array = np.array(portrait)
new_portrait_array = []

# For each pixel of the image
for index, row in tqdm(enumerate(portrait_array), total=len(portrait_array)):
    row_images = []
    for pixel in row:
        # Get the closest image to the current pixel
        wished_index = 0
        while True:
            closest_image_path = get_closer_rgb(rgb_values, pixel, 10, wished_index)
            try:
                pixel_closest_image = Image.open(closest_image_path)
                row_images.append(pixel_closest_image)
                break
            except UnidentifiedImageError:
                wished_index += 1
                continue
    # Add the new line to the new image
    combined_row = np.hstack([np.asarray(pixel_image) for pixel_image in row_images])
    combined_row_image = Image.fromarray(combined_row)
    new_portrait_array.append(combined_row_image)
# Vertical combinaison for lines
combined_output_array = np.vstack([np.asarray(pixel_row) for pixel_row in new_portrait_array])
combined_output_image = Image.fromarray(combined_output_array)
# Save the image
combined_output_image.save(
    f"{INPUT_IMAGE_PATH.split('.')[0]}_{PIXABAY_QUERY.replace(' ', '_')}_w{OUTPUT_IMAGE_PIXELS_WIDTH}_{OUTPUT_IMAGE_WISHED_DIMENSIONS[0]}x{OUTPUT_IMAGE_WISHED_DIMENSIONS[1]}.jpg"
)