```
Copyright 2021 Twitter, Inc.
SPDX-License-Identifier: Apache-2.0
```

# Gender Gaze Analysis

* This notebook prepares a dataset for gender gaze analysis. 
* It selects `MAX_FOUND` number of images
* The selected images' saliency maps are stored in the folder `./gender_gaze/annotations/{GENDER}` with the same name as the image. 
* Each image's salienct segment regions are saved in a file with a suffix `_regions`
* Once the images are generated you can look at the saliency map images and assess if the most salient point is on the face or not as well as if any non face area is getting detected as a salient region using the `_regions` file. 

In [1]:
import logging
import shlex
import subprocess
import sys
from collections import namedtuple
from pathlib import Path

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle

logging.basicConfig(level=logging.ERROR)

In [2]:
import platform

BIN_MAPS = {"Darwin": "mac", "Linux": "linux"}

HOME_DIR = Path("../").expanduser()

try:
    import google.colab
    ! pip install pandas scikit-learn scikit-image statsmodels requests dash
    ! [[ -d image-crop-analysis ]] || git clone https://github.com/twitter-research/image-crop-analysis.git
    HOME_DIR = Path("./image-crop-analysis").expanduser()
    IN_COLAB = True
except:
    IN_COLAB = False

sys.path.append(str(HOME_DIR / "src"))
bin_dir = HOME_DIR / Path("./bin")
bin_path = bin_dir / BIN_MAPS[platform.system()] / "candidate_crops"
model_path = bin_dir / "fastgaze.vxm"
data_dir = HOME_DIR / Path("./data/")
data_dir.exists()

True

In [3]:
df = pd.read_csv(data_dir / Path("dataset.tsv"), sep="\t")
df.head()

Unnamed: 0,human,image,sex_or_gender,ethnic_group,url,local_path,file_exists
0,f6131d7d4797a5930853a85e273ab780c612a7c3,f6131d7d4797a5930853a85e273ab780c612a7c3.jpg,all,na,f6131d7d4797a5930853a85e273ab780c612a7c3.jpg,f6131d7d4797a5930853a85e273ab780c612a7c3.jpg,True
1,69fee9fa618dcbd6b46e1f7cfb9a20d2fde1ce51,69fee9fa618dcbd6b46e1f7cfb9a20d2fde1ce51.jpg,all,na,69fee9fa618dcbd6b46e1f7cfb9a20d2fde1ce51.jpg,69fee9fa618dcbd6b46e1f7cfb9a20d2fde1ce51.jpg,True
2,da317181505d7bebdf46d843f8328e0ceb5697f3,da317181505d7bebdf46d843f8328e0ceb5697f3.jpg,all,na,da317181505d7bebdf46d843f8328e0ceb5697f3.jpg,da317181505d7bebdf46d843f8328e0ceb5697f3.jpg,True
3,3803a5d80814f5b54e1f6562cae369a414a9657a,3803a5d80814f5b54e1f6562cae369a414a9657a.jpg,all,na,3803a5d80814f5b54e1f6562cae369a414a9657a.jpg,3803a5d80814f5b54e1f6562cae369a414a9657a.jpg,True
4,e0f5f893f8695e3dbd5809b796d7685366b43af2,e0f5f893f8695e3dbd5809b796d7685366b43af2.jpg,all,na,e0f5f893f8695e3dbd5809b796d7685366b43af2.jpg,e0f5f893f8695e3dbd5809b796d7685366b43af2.jpg,True


In [4]:
from crop_api import parse_output, ImageSaliencyModel, is_symmetric, reservoir_sampling
from image_manipulation import get_image_saliency_map, process_image

In [5]:
model = ImageSaliencyModel(crop_binary_path=bin_path, crop_model_path=model_path)

In [6]:
%%time
MAX_FOUND = 100
for gender in df.sex_or_gender.unique(): 
    annotation_dir = data_dir / Path(f"./gender_gaze/annotations/{gender}")
    annotation_dir.mkdir(parents=True, exist_ok=True)
    found = 0
    for img_path in df[df.sex_or_gender == gender].sample(frac = 1, random_state=42).local_path:
        if not img_path.lower().endswith((".jpg", ".jpeg")): continue
        if found >= MAX_FOUND: break
        img_path = data_dir / Path(f"./images/{img_path}")
        if (annotation_dir / img_path.name).exists():
            found += 1
            continue
        try:
            img, image_label_overlay, regions, threshold = get_image_saliency_map(img_path, model)
        except TypeError as e:
            print(img_path, e)
            continue
        img_shape = img.shape
        n_regions = len([r for r in regions if r.area > 1000])
        print(img_path.name, img_shape[0] / img_shape[1], n_regions)
        if n_regions < 2 or (img_shape[0] / img_shape[1]) < 1.25:
            # Only select images if it has more than 2 big regions (of area > 1000) and image is significantly tall. 
            continue
        found += 1
        process_image(img_path, model)
        img_path_parts = img_path.name.rsplit(".", 1)
        plt.savefig(annotation_dir / f"{img_path_parts[0]}_regions.{img_path_parts[-1]}", bbox_inches="tight")
        plt.close("all")
        model.plot_img_crops(img_path, aspectRatios=[1], topK=1)
        plt.savefig(annotation_dir / img_path.name, bbox_inches="tight")
        plt.close("all")

ca388ea5bfe7e1b3f14d5f3f3aeb659d275f776e.jpg 1.7777777777777777 1
0e56af53e9338cbd012418fe33b5de690ccd1cf8.jpg 1.9393939393939394 2
[1] 495 960
1709c3cd40196237c3e7f79bd7dd9f7a8d57b0b0.jpg 0.75 1
1db6495f7ab0210c6b8dfea80fc6a4dd8f9873d6.jpg 1.3328125 3
[1] 640 853
c5ae9fa528755d98d9656751eefff5a6c74a6ad6.jpg 1.3328125 3
[1] 640 853
bc82739c5add1eff567d42899472616da5c5a7c5.jpg 1.7777777777777777 2
[1] 540 960
fc56b401a9d5f498f7d2f280f55e95b921db4772.jpg 1.2296875 1
be68cc3d7087b39145f46581692255f6ad28b4e2.jpg 1.3328125 3
[1] 640 853
933467ce58c26471ec81aadabc0bf46c06b54264.jpg 1.053125 4
f0c2a144f8e925fe1c5368b612dd07ab875e8364.jpg 1.8823529411764706 1
484d5ee7cfe02ad7135efa9e44673af5ee25f4f5.jpg 1.25 1
0b0ca280ae070cde6e4e9cd1f0661b2e067c13b0.jpg 2.088091353996737 3
[1] 613 1280
333df2e75342c4939794381a05f1dc6369cc66b6.jpg 1.8497109826589595 3
[1] 519 960
1f15b8366ef25e697913bcc355cf1b8aee7fdcf0.jpg 1.8497109826589595 1
49ac45bff5ca2d5edc3bc2740be28320d8f4f224.jpg 1.875 1
bc4fdb91b735d

40c4717629c1f72aeea5be3ec0bb4dd05306e1f9.jpg 0.9953125 3
72db659468fafd0c110d9d09e21b46169aa384ca.jpg 0.5359375 1
be32f8be1f28d34a15f7ca5b9f20813bc12b863c.jpg 2.1670428893905194 1
e1accb0ee657f38c19bbb515b1aee7f28513df0a.jpg 1.3328125 1
26bc1277145cf91702da8a38c62d6f7314907f4c.jpg 1.3328125 3
[1] 640 853
46d85d8962d3629a890c6568dff5661c9933f61e.jpg 1.9393939393939394 2
[1] 495 960
3687b97e239b382aa0c3e94b5636e0f933314a7a.jpg 1.0 1
2ee018d4e96c987438274237f63a9e2eeebf5e46.jpg 2.0869565217391304 1
47fe658a4ce6522ed8519f640e908673eb80364f.jpg 1.8497109826589595 2
[1] 519 960
a85c6f07019c65454185973b2b1f92169419717d.jpg 0.75 1
e6e89f22ae1850c957d92173e44954f05fb1aeb1.jpg 1.3328125 3
[1] 640 853
d195262d75e7a1c065b373d364a055649c032242.jpg 1.08125 5
63f29e8b5f10c6e322a2fb84b3bcbad65b7c699b.jpg 1.3328125 2
[1] 640 853
3352c884a9ae0162b3765d97d6da690d6359c35c.jpg 1.3328125 1
9948c06a6f00bf7ef0dd7e2a204884b13dd13a05.jpg 1.3328125 3
[1] 640 853
e2c1012273d4701687d79908f5aa0289978cee65.jpg 1.339

32f49b98939e3a51ae07c53228a2479643cef59c.jpg 1.8497109826589595 1
88027546dc1934d4506c36dd4793fb411560526a.jpg 1.3328125 1
6fcd872e62e7b1076e39d1d2c5ea3ca451d311c0.jpg 1.0 2
4af6246d9f8d2406a1332091b1299834be6ef311.jpg 1.48125 3
[1] 640 948
103992a446b33e43160457f5df5f73bdac4021ca.jpg 1.6026711185308848 3
[1] 599 960
4ddc59b58e698f364f5ed650ba1d5d319fa04f70.jpg 1.7777777777777777 3
[1] 540 960
b25431b6f21dfce67e7ecd50e45a2de6d2ba7209.jpg 1.346875 5
[1] 640 862
0a22b17fa26aa9370c5b146eb5a3ceb9f3ffab04.jpg 1.3328125 2
[1] 640 853
e63161e9176ab3f61bc94f5e6ea5da71469bbf83.jpg 1.15625 2
97ea09590f384ae2c3c2d70e118e61476f2639d5.jpg 1.3328125 4
[1] 640 853
27258841979a382efd8a35cf6f39eccece5853b3.jpg 1.3328125 1
81ab991f8dbc0560f4e6c28fd1f7a9f3516aaf76.jpg 1.7777777777777777 2
[1] 540 960
e98d560e1fa010ac75f509bac8dcaecfda94f7f9.jpg 0.8925925925925926 3
79c1f2c07800f54743325d76ced663a1b7b2a590.jpg 1.0 3
0a71ca7107a4a9efd58a9ca065d5876dd778dd10.jpg 1.6770186335403727 2
[1] 483 810
2e56b06bf2a3

[1] 540 960
d6c853373d37b939be6e1bccaf0e9e9f8a3ba355.jpg 1.0 1
75e7caf9332ccc6c6bb785bd6b792b93c61ca04c.jpg 1.3328125 1
32c92a86fdd444fdbc7be1f2455540d48e344800.jpg 1.646655231560892 3
[1] 583 960
53a8d82c926848566d01d39dc38fc00f6c393b4a.jpg 1.3328125 1
6b816f857b3e04b558cf9e610288bcd8d9cbca0d.jpg 1.3328125 1
ef1f895c88f0caa1e1415c551782fe91c3a07c71.jpg 1.3328125 3
[1] 640 853
2dc9be0f895b4ba8a3acd0e9045f5ca568270850.jpg 1.0 3
b9fec1f9c14d05a33c418560ea9c266fd38b60be.jpg 2.546419098143236 1
5d129e62e888f203cb69fc15308c7bb11b4380d9.jpg 1.8497109826589595 3
[1] 519 960
08d7593b145261dd50b62cd8c5627dbc63951150.jpg 1.3328125 2
[1] 640 853
732cd853a1603a7228390e167d17ba675893b03b.jpg 1.3328125 3
[1] 640 853
60ff8088512d33ff6b6262a48be522891ca2eef8.jpg 1.3328125 2
[1] 640 853
1350017930dba99fbf88592a25dd966bbb70e8a6.jpg 1.3328125 3
[1] 640 853
60aba7fb0af78009a202845c6e0eeb3a1df62ee0.jpg 1.1171875 3
55e53bd541ec8c19aac15c805c8f187349ac9a78.jpg 1.33125 1
a698431dde2cfd7dfdcf076d9d4c15eb3879ba