In [1]:
%load_ext autoreload
%autoreload 2

In [177]:
import json
import terra
import pandas as pd
from meerkat.contrib.visual_genome import read_visual_genome_dps
from domino.data.visual_genome import get_dps, ATTRIBUTE_GROUPS
from domino.slices.correlation import induce_correlation
import numpy as np

In [3]:
image_dp, attr_dp, object_dp, rel_dp = get_dps()

In [4]:
 # filter to only include objects with an annotated color 
colored_objects = attr_dp["object_id"][
    attr_dp["attribute"].isin(ATTRIBUTE_GROUPS["colors"])
]
dp = object_dp.lz[np.isin(object_dp["object_id"], colored_objects)]

# set the target column to "is car?"
dp["target"] = dp["syn_name"].isin(["car.n.01"]).values.astype(int)

# set the correlate column to "is red?"
red_objects = attr_dp["object_id"][
    attr_dp["attribute"].isin(["red"])
]
dp["correlate"] = np.isin(dp["object_id"], red_objects).astype(int)

In [37]:
np.isin(object_dp["object_id"], attr_objects).astype(int).sum()

6647

In [42]:
attr_objects = attr_dp["object_id"][
    attr_dp["attribute"].isin(["brick"])
]
attr_objects = object_dp.lz[np.isin(object_dp["object_id"], attr_objects).astype(bool)]

attr_objects["syn_name"].data.value_counts().iloc[:10]

building.n.01    1940
                 1402
wall.n.01        1354
sidewalk.n.01     346
house.n.01        152
chimney.n.01      132
tower.n.01        128
walk.n.05         102
land.n.04          85
street.n.01        75
Name: syn_name, dtype: int64

In [145]:
 # filter to only include objects with an annotated color \
attr_group = "gender"
objects = attr_dp["object_id"][
    attr_dp["attribute"].isin(ATTRIBUTE_GROUPS[attr_group])
]
dp = object_dp.lz[np.isin(object_dp["object_id"], objects)]

# set the target column to "is car?"
dp["target"] = dp["syn_name"].isin(["building.n.01"]).values.astype(int)

# set the correlate column to "is red?"
red_objects = attr_dp["object_id"][
    attr_dp["attribute"].isin(["male"])
]
dp["correlate"] = np.isin(dp["object_id"], red_objects).astype(int)

In [148]:
attr_dp = object_dp.merge(right=attr_dp, on="object_id")

In [183]:
#  get correlation slices
"""
One challenge with using the attributes in visual genome is that annotators are free 
to label whatever attributes they choose. So, if an object isn't labeled with an 
attribute, it doesn't necessarily mean that it doesn't have that attribute – the 
annotator may have just chosen not to mention it. In other words, it's clear when the 
attribute is  present, but unclear when it's not. We address this is by forming groups 
of mutually exclusive attributes: {"long", "short"}, {"blue", "green", "red}. The 
assumption we then make is that if any one of attributes is labeled for an object, then
the rest of the attributes in the group are False.
"""
dfs = []
for name, group in ATTRIBUTE_GROUPS.items():
    # get all objects for which at least one attribute in the group is annotated
    dp = attr_dp.lz[attr_dp["attribute"].isin(group)]

    if len(dp) < 20000:
        continue 
    
    df = dp[["attribute", "syn_name"]].to_pandas()
    df = df[df["syn_name"] != ""]
    counts = df.value_counts()
    df = counts[counts > 300].reset_index().rename(columns={0: "count"})
    df["group"] = name
    dfs.append(df)
    continue 
    for attribute, syn_name in list(zip(df["attribute"], df["syn_name"])):
        dp["target"] = (dp["syn_name"] == syn_name).astype(int)
        dp["correlate"] = (dp["attribute"] == attribute).astype(int)

        induce_correlation(
            df = dp,
            attr_a="target",
            attr_b="correlate",
            mu_a=0.1,
            mu_b=0.1,
            n=10000
        )
df = pd.concat(dfs)


In [191]:
df[df["group"] == "pose"]

Unnamed: 0,attribute,syn_name,count,group
0,standing,man.n.01,2549,pose
1,standing,person.n.01,1921,pose
2,sitting,man.n.01,1476,pose
3,walking,person.n.01,1340,pose
4,walking,man.n.01,1314,pose
5,standing,woman.n.01,960,pose
6,sitting,person.n.01,948,pose
7,sitting,woman.n.01,886,pose
8,standing,people.n.01,843,pose
9,walking,people.n.01,782,pose


In [188]:
df[df]

group
activity     12
colors      517
darkness     11
height       10
length       11
material     32
pose         23
size         43
dtype: int64

In [68]:
dp = common_object_dp.merge(attr_dp, on="object_id")
dp

In [72]:
df = dp[["attribute", "syn_name"]].to_pandas()
df

Unnamed: 0,attribute,syn_name
0,green,clock.n.01
1,tall,clock.n.01
2,sidewalk,street.n.01
3,grey,gym_shoe.n.01
4,off,headlight.n.01
...,...,...
2055329,large,bus.n.01
2055330,green,bus.n.01
2055331,crystal clear,sky.n.01
2055332,blue,sky.n.01


In [93]:
object_attr_pairs = (
    df.groupby(["syn_name", "attribute"])
    .size()
    .sort_values(ascending=False)
    .reset_index()
)
object_attr_pairs = object_attr_pairs[object_attr_pairs["syn_name"] != ""]

In [100]:
object_attr_pairs

Unnamed: 0,syn_name,attribute,0
5,sky.n.01,blue,17691
6,cloud.n.01,white,16761
8,grass.n.01,green,11151
10,tree.n.01,green,10870
11,leaf.n.01,green,10690
...,...,...,...
201027,house.n.01,baby blue,1
201028,house.n.01,attractive,1
201029,house.n.01,at water's edge,1
201030,house.n.01,at background,1


In [None]:
object_dp