# Explore cartoon images

In [None]:
IM_SIZE = (15,15)

HIST_SIZE = (12, 3)
BUCKET_NB = 8

In [None]:
%cd ../..

In [None]:
import os
import random
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import colorsys
import pickle
from collections import Counter
from typing import List, Dict, Tuple

from scripts import config

In [None]:
def show_images(image_paths: List[str], nb_images: int = 5):
    random.shuffle(image_paths)
    for i in range(nb_images):
        image = plt.imread(image_paths[i])
        plt.figure(figsize=IM_SIZE)
        plt.imshow(image)
        plt.axis("off")
        plt.show()

In [None]:
def reduce_color_space(rgb: List[int]):
    bucket_size = 256//BUCKET_NB
    r=rgb[0]//bucket_size*bucket_size + bucket_size//2
    g=rgb[1]//bucket_size*bucket_size + bucket_size//2
    b=rgb[2]//bucket_size*bucket_size + bucket_size//2
    return (r,g,b)

def hexencode(rgb):
    r=int(rgb[0])
    g=int(rgb[1])
    b=int(rgb[2])
    return '#%02x%02x%02x' % (r,g,b)

def plot_color_histogram(all_colors: Dict[Tuple[int, int, int], int], nb_colors: int = -1):
    colors_to_show = sorted([(nb, color) for color, nb in all_colors.items()], key=lambda x : x[0], reverse=True)
    if nb_colors > 0:
        colors_to_show = colors_to_show[:nb_colors]
    colors_to_show = sorted(colors_to_show, key=lambda x : colorsys.rgb_to_hsv(*x[1]))

    plt.figure(figsize=HIST_SIZE)
    for idx, c in enumerate(colors_to_show):
        plt.bar(idx, c[0], color=hexencode(c[1]), edgecolor=hexencode(c[1]))
    ax = plt.gca()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

def compute_plot_color_histogram(image_paths: List[str], frac: int = 1, nb_colors: int = -1):
    image_paths = random.sample(image_paths, int(frac*len(image_paths)))
    all_colors_counter = Counter()

    for image_path in tqdm(image_paths):
        im = Image.open(image_path)  
        w, h = im.size  
        colors = im.getcolors(w*h)
        all_colors_counter += Counter(dict((reduce_color_space(c), n) for n, c in colors))

    all_colors = dict(all_colors_counter)
    
    plot_color_histogram(all_colors_counter, nb_colors)

    return all_colors

In [None]:
def hsv2rgb(h,s,v):
    return tuple(round(i * 255) for i in colorsys.hsv_to_rgb(h,s,v))

def plot_hsv_histogram(all_colors: Dict[str, Dict[float, int]]):

    def get_rgb_color(hsv: int, mode: str):
        if mode == 'hue':
            c = (hsv/256, 1, 1)
        if mode == 'saturation':
            c = (0, hsv/256, 0.95)
        if mode == 'value':
            c = (0, 0, max(0, hsv/256-0.05))
        return tuple(round(i * 255) for i in colorsys.hsv_to_rgb(*c))


    for mode, all_colors_from_mode in all_colors.items():
        colors_to_show = [(nb, get_rgb_color(color, mode)) for color, nb in sorted(all_colors_from_mode.items(), key=lambda x : x[0])]
        plt.figure(figsize=HIST_SIZE)
        for idx, c in enumerate(colors_to_show):
            plt.bar(idx, c[0], color=hexencode(c[1]), edgecolor=hexencode(c[1]))
        ax = plt.gca()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        plt.title(f"Histogram of {mode}", fontsize=18)

def compute_plot_hsv_histogram(image_paths: List[str], frac: int = 1):

    modes = ["hue", "saturation", "value"]

    image_paths = random.sample(image_paths, int(frac*len(image_paths)))
    all_colors_counter = {mode: Counter() for mode in modes}

    for image_path in tqdm(image_paths):
        im = Image.open(image_path).convert('HSV')
        for i in range(3):
            extracted = np.asarray(im)[:,:,i]
            unique, counts = np.unique(extracted, return_counts=True)
            all_colors_counter[modes[i]] += Counter(dict(zip(unique, counts)))

    all_colors = {k: dict(v) for k, v in all_colors_counter.items()}

    plot_hsv_histogram(all_colors)

    return all_colors  

## Movie by movie

In [None]:
def show_images_from_movie(movie: str, nb_images: int = 5):
    path = os.path.join(config.CARTOONS_FOLDER, movie)
    all_images = [os.path.join(path, name) for name in os.listdir(path)]
    return show_images(all_images, nb_images)

In [None]:
MOVIE = "Luca"
NB_IMAGES = 20

show_images_from_movie(MOVIE, NB_IMAGES)

In [None]:
def get_color_histogram_one_movie(movie: str, frac: float = 1, nb_colors: int = -1):
    path = os.path.join(config.CARTOONS_FOLDER, movie)
    all_images = [os.path.join(path, name) for name in os.listdir(path)]
    return compute_plot_color_histogram(all_images, frac, nb_colors)

In [None]:
MOVIE = "BABYBOSS"
FRAC = 0.2
NB_COLORS = -1

get_color_histogram_one_movie(MOVIE, FRAC, NB_COLORS)
pass

In [None]:
def get_hsv_histograms_one_movie(movie: str, frac: float = 1):
    path = os.path.join(config.CARTOONS_FOLDER, movie)
    all_images = [os.path.join(path, name) for name in os.listdir(path)]
    return compute_plot_hsv_histogram(all_images, frac)

In [None]:
MOVIE = "BABYBOSS"
FRAC = 0.01

get_hsv_histograms_one_movie(MOVIE, FRAC)
pass

## Histograms etc. for each movies

In [None]:
NB_IMAGES = 50

all_images = []
for movie in config.MOVIES:
    movie_name = movie.value
    path = os.path.join(config.CARTOONS_FOLDER, movie_name)
    all_images.extend([os.path.join(path, name) for name in os.listdir(path)])

show_images(all_images, nb_images=NB_IMAGES)

In [None]:
FRAC = 1
NB_COLORS = -1

with open(config.HISTOGRAMS_PATH, "rb") as f:
    all_histograms = pickle.load(f)

for movie in config.MOVIES:
    movie_name = movie.value
    print(f"\n\nProcessing {movie_name}")
    if movie_name not in all_histograms:
        all_histograms[movie_name] = {}

    if "color" not in all_histograms[movie_name]:
        all_histograms[movie_name]["color"] = get_color_histogram_one_movie(movie_name, FRAC, NB_COLORS)
    else:
        plot_color_histogram(all_histograms[movie_name]["color"], NB_COLORS)
    with open(config.HISTOGRAMS_PATH, "wb") as f:
        pickle.dump(all_histograms, f)

    if "hsv" not in all_histograms[movie_name]:
        all_histograms[movie_name]["hsv"] = get_hsv_histograms_one_movie(movie_name, FRAC)
    else:
        plot_hsv_histogram(all_histograms[movie_name]["hsv"])
    with open(config.HISTOGRAMS_PATH, "wb") as f:
        pickle.dump(all_histograms, f)


In [None]:
NB_COLORS = -1

with open(config.HISTOGRAMS_PATH, "rb") as f:
    all_histograms = pickle.load(f)

colors_all_movies = dict(sum([Counter(all_histograms[movie.name]["color"]) for movie in config.MOVIES], Counter()))
plot_color_histogram(colors_all_movies, NB_COLORS)

h_all_movies = dict(sum([Counter(all_histograms[movie.name]["hsv"]["hue"]) for movie in config.MOVIES], Counter()))
s_all_movies = dict(sum([Counter(all_histograms[movie.name]["hsv"]["saturation"]) for movie in config.MOVIES], Counter()))
v_all_movies = dict(sum([Counter(all_histograms[movie.name]["hsv"]["value"]) for movie in config.MOVIES], Counter()))
plot_hsv_histogram({"hue": h_all_movies, "saturation": s_all_movies, "value": v_all_movies})

## With images

In [None]:
NB_IMAGES = 5

path = config.PICTURES_FOLDER
all_images = [os.path.join(path, name) for name in os.listdir(path)]
show_images(all_images, NB_IMAGES)

In [None]:
FRAC = 1
NB_COLORS = -1

path = config.PICTURES_FOLDER
all_images = [os.path.join(path, name) for name in os.listdir(path)]

with open(config.HISTOGRAMS_PATH, "rb") as f:
    all_histograms = pickle.load(f)

if not "pictures" in all_histograms:
    all_histograms["pictures"] = {}

if not "color" in all_histograms["pictures"]:
    hist_colors = compute_plot_color_histogram(all_images, FRAC, NB_COLORS)
    all_histograms["pictures"]["color"] = hist_colors
else:
    plot_color_histogram(all_histograms["pictures"]["color"], NB_COLORS)

if not "hsv" in all_histograms["pictures"]:
    hist_hsv = compute_plot_hsv_histogram(all_images, FRAC)
    all_histograms["pictures"]["hsv"] = hist_hsv
else:
    plot_hsv_histogram(all_histograms["pictures"]["hsv"])

with open(config.HISTOGRAMS_PATH, "wb") as f:
    pickle.dump(all_histograms, f)