In [2]:
import clip
import torch
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [13]:
image_path = "../image-text/try.png"
image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)

labels = ["an asian woman", "a white woman", "a black woman", "a latino woman"]
text = clip.tokenize(labels).to(device)

with torch.no_grad():
    image_features = model.encode_image(image)
    text_features = model.encode_text(text)

    logits_per_image, _ = model(image, text)
    probabilities = logits_per_image.softmax(dim=-1).cpu().numpy()

max_idx = probabilities.argmax()
print(f"The image most likely contains: {labels[max_idx]} with a confidence of {probabilities[0][max_idx]:.2f}")

The image most likely contains: an asian woman with a confidence of 0.73


In [4]:
import pandas as pd
file_path = "../fairface/fairface_label_val.csv"
data = pd.read_csv(file_path)

age_prompts = data['age'].unique()
gender_prompts = data['gender'].unique()
race_prompts = data['race'].unique()

age_prompts, gender_prompts, race_prompts

(array(['3-9', '50-59', '30-39', '20-29', 'more than 70', '40-49', '10-19',
        '60-69', '0-2'], dtype=object),
 array(['Male', 'Female'], dtype=object),
 array(['East Asian', 'White', 'Latino_Hispanic', 'Southeast Asian',
        'Black', 'Indian', 'Middle Eastern'], dtype=object))

In [5]:
import os
from sklearn.metrics import accuracy_score

age_prompts = ['3-9', '50-59', '30-39', '20-29', 'more than 70', '40-49', '10-19', '60-69', '0-2']
gender_prompts = ['Male', 'Female']
race_prompts = ['East Asian', 'White', 'Latino_Hispanic', 'Southeast Asian', 'Black', 'Indian', 'Middle Eastern']

csv_file_path = "../fairface/fairface_label_val.csv"
data = pd.read_csv(csv_file_path)

In [14]:
# data = data[:10]

In [15]:
from tqdm import tqdm

true_genders = []
true_races = []
predicted_genders = []
predicted_races = []

for index, row in tqdm(data.iterrows(), total=len(data), desc="Processing data"):

    image_path = os.path.join("../fairface/fairface-img-margin025-trainval/", row['file'])
    print(image_path)
    image = preprocess(Image.open(image_path)).unsqueeze(0).to(device)
    image_features = model.encode_image(image)
    
    # Encode image and text
    with torch.no_grad():

        # Gender
        text_gender = clip.tokenize(gender_prompts).to(device)
        text_features = model.encode_text(text_gender)
        logits_gender = (image_features @ text_features.T).softmax(dim=-1)
        predicted_genders.append(gender_prompts[logits_gender.argmax().item()])
        
        # Race
        text_race = clip.tokenize(race_prompts).to(device)
        text_features = model.encode_text(text_race)
        logits_race = (image_features @ text_features.T).softmax(dim=-1)
        predicted_races.append(race_prompts[logits_race.argmax().item()])
    
    # Append true labels
    true_genders.append(row['gender'])
    true_races.append(row['race'])


# Calculate accuracy
gender_accuracy = accuracy_score(true_genders, predicted_genders)
race_accuracy = accuracy_score(true_races, predicted_races)

print(f"Gender Prediction Accuracy: {gender_accuracy:.2%}")
print(f"Race Prediction Accuracy: {race_accuracy:.2%}")

Processing data:   0%|          | 0/10 [00:00<?, ?it/s]

../fairface/fairface-img-margin025-trainval/val/1.jpg


Processing data:  10%|█         | 1/10 [00:01<00:12,  1.34s/it]

../fairface/fairface-img-margin025-trainval/val/2.jpg


Processing data:  20%|██        | 2/10 [00:02<00:10,  1.31s/it]

../fairface/fairface-img-margin025-trainval/val/3.jpg


Processing data:  30%|███       | 3/10 [00:03<00:08,  1.15s/it]

../fairface/fairface-img-margin025-trainval/val/4.jpg


Processing data:  40%|████      | 4/10 [00:04<00:06,  1.12s/it]

../fairface/fairface-img-margin025-trainval/val/5.jpg


Processing data:  50%|█████     | 5/10 [00:05<00:05,  1.11s/it]

../fairface/fairface-img-margin025-trainval/val/6.jpg


Processing data:  60%|██████    | 6/10 [00:06<00:04,  1.08s/it]

../fairface/fairface-img-margin025-trainval/val/7.jpg


Processing data:  70%|███████   | 7/10 [00:07<00:03,  1.04s/it]

../fairface/fairface-img-margin025-trainval/val/8.jpg


Processing data:  80%|████████  | 8/10 [00:08<00:02,  1.02s/it]

../fairface/fairface-img-margin025-trainval/val/9.jpg


Processing data:  90%|█████████ | 9/10 [00:09<00:01,  1.05s/it]

../fairface/fairface-img-margin025-trainval/val/10.jpg


Processing data: 100%|██████████| 10/10 [00:10<00:00,  1.09s/it]

Gender Prediction Accuracy: 90.00%
Race Prediction Accuracy: 40.00%



