In [1]:
import pandas as pd
import plotly.express as px
import networkx as nx
from tqdm import tqdm

In [2]:
train_annot_path = "../data/epic-kitchens-100-annotations/EPIC_100_train.pkl"
test_annot_path = "../data/epic-kitchens-100-annotations/EPIC_100_validation.pkl"

verb_classes = pd.read_csv("../data/epic-kitchens-100-annotations/EPIC_100_verb_classes.csv", index_col=0)
noun_classes = pd.read_csv("../data/epic-kitchens-100-annotations/EPIC_100_noun_classes.csv", index_col=0)

In [3]:
verb_classes.key

id
0         take
1          put
2         wash
3         open
4        close
        ...   
92     prepare
93        bake
94        mark
95        bend
96    unfreeze
Name: key, Length: 97, dtype: object

In [4]:
train_df = pd.read_pickle(train_annot_path)
train_df.head()

Unnamed: 0_level_0,participant_id,video_id,narration_timestamp,start_timestamp,stop_timestamp,start_frame,stop_frame,narration,verb,verb_class,noun,noun_class,all_nouns,all_noun_classes
narration_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
P01_01_0,P01,P01_01,00:00:01.089,00:00:00.14,00:00:03.37,8,202,open door,open,3,door,3,[door],[3]
P01_01_1,P01,P01_01,00:00:02.629,00:00:04.37,00:00:06.17,262,370,turn on light,turn-on,6,light,114,[light],[114]
P01_01_10,P01,P01_01,00:00:23.340,00:00:24.97,00:00:26.20,1498,1572,open drawer,open,3,drawer,8,[drawer],[8]
P01_01_100,P01,P01_01,00:07:57.919,00:07:59.75,00:08:00.88,28785,28852,take cup,take,0,cup,13,[cup],[13]
P01_01_101,P01,P01_01,00:08:00.020,00:08:01.47,00:08:02.21,28888,28932,open cupboard,open,3,cupboard,3,[cupboard],[3]


In [5]:
train_df["verb_class_name"] = train_df["verb_class"].map(verb_classes.key)
train_df["noun_class_name"] = train_df["noun_class"].map(noun_classes.key)
train_df["action"] = "(" + train_df["verb_class_name"] + ", " + train_df["noun_class_name"] + ")"
train_df.head()

Unnamed: 0_level_0,participant_id,video_id,narration_timestamp,start_timestamp,stop_timestamp,start_frame,stop_frame,narration,verb,verb_class,noun,noun_class,all_nouns,all_noun_classes,verb_class_name,noun_class_name,action
narration_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
P01_01_0,P01,P01_01,00:00:01.089,00:00:00.14,00:00:03.37,8,202,open door,open,3,door,3,[door],[3],open,cupboard,"(open, cupboard)"
P01_01_1,P01,P01_01,00:00:02.629,00:00:04.37,00:00:06.17,262,370,turn on light,turn-on,6,light,114,[light],[114],turn-on,light,"(turn-on, light)"
P01_01_10,P01,P01_01,00:00:23.340,00:00:24.97,00:00:26.20,1498,1572,open drawer,open,3,drawer,8,[drawer],[8],open,drawer,"(open, drawer)"
P01_01_100,P01,P01_01,00:07:57.919,00:07:59.75,00:08:00.88,28785,28852,take cup,take,0,cup,13,[cup],[13],take,cup,"(take, cup)"
P01_01_101,P01,P01_01,00:08:00.020,00:08:01.47,00:08:02.21,28888,28932,open cupboard,open,3,cupboard,3,[cupboard],[3],open,cupboard,"(open, cupboard)"


In [6]:
verbs = verb_classes.key
nouns = noun_classes.key


def get_cooccurrence_matrix(df, verbs, nouns):
    cooccurrence = pd.DataFrame(index=verbs, columns=nouns)
    cooccurrence = cooccurrence.fillna(0)

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Building cooccurrence matrix", unit=" row(s)"):
        cooccurrence.at[row["verb_class_name"], row["noun_class_name"]] += 1

    return cooccurrence


cooccurrence = get_cooccurrence_matrix(
    df=train_df,
    verbs=verbs,
    nouns=nouns,
)

Building cooccurrence matrix: 100%|██████████| 67217/67217 [00:03<00:00, 21105.09 row(s)/s]


In [7]:
fig = px.imshow(
    cooccurrence,
    y=cooccurrence.index,
    x=cooccurrence.columns,
    title="Co-occurrence of verb-noun pairs in training set",
    width=1200,
    height=1200,
    color_continuous_scale=["white", "purple"],
    aspect="auto",
    labels={"x": "Nouns", "y": "Verbs"},
)
fig.write_image("../res/coocc/png/matrix.png", scale=2.0)
fig.show()

In [8]:
cooccurrence.index

Index(['take', 'put', 'wash', 'open', 'close', 'insert', 'turn-on', 'cut',
       'turn-off', 'pour', 'mix', 'move', 'remove', 'throw', 'dry', 'shake',
       'scoop', 'adjust', 'squeeze', 'peel', 'empty', 'press', 'flip', 'turn',
       'check', 'scrape', 'fill', 'apply', 'fold', 'scrub', 'break', 'pull',
       'pat', 'lift', 'hold', 'eat', 'wrap', 'filter', 'look', 'unroll',
       'sort', 'hang', 'sprinkle', 'rip', 'spray', 'cook', 'add', 'roll',
       'search', 'crush', 'stretch', 'knead', 'divide', 'set', 'feel', 'rub',
       'soak', 'brush', 'sharpen', 'drop', 'drink', 'slide', 'water', 'gather',
       'attach', 'turn-down', 'coat', 'transition', 'wear', 'measure',
       'increase', 'unscrew', 'wait', 'lower', 'form', 'smell', 'use', 'grate',
       'screw', 'let-go', 'finish', 'stab', 'serve', 'uncover', 'unwrap',
       'choose', 'lock', 'flatten', 'switch', 'carry', 'season', 'unlock',
       'prepare', 'bake', 'mark', 'bend', 'unfreeze'],
      dtype='object', name='key'

In [9]:
dimensions = ["verb_class_name", "noun_class_name"]

verbs = train_df["verb_class_name"]
nouns = train_df["noun_class_name"]

verbs = verbs[::-1]
nouns = nouns[::-1]

df = pd.DataFrame({"Verbs": verbs, "Nouns": nouns})
df

Unnamed: 0_level_0,Verbs,Nouns
narration_id,Unnamed: 1_level_1,Unnamed: 2_level_1
P37_103_9,break,chicken
P37_103_8,break,chicken
P37_103_73,pour,water
P37_103_72,take,pan
P37_103_71,turn-off,tap
...,...,...
P01_01_101,open,cupboard
P01_01_100,take,cup
P01_01_10,open,drawer
P01_01_1,turn-on,light


In [10]:
train_df["verb_class_name"].value_counts()

verb_class_name
take        14848
put         12225
wash         6927
open         4870
close        3483
            ...  
mark            3
bend            2
unfreeze        2
season          2
bake            1
Name: count, Length: 97, dtype: int64

In [11]:
import os


interesting_verbs = [
    "turn-off",
    "turn-on",
    "take",
    "put",
    "wash",
    "close",
    "open"
]

output_path = "../res/coocc"
formats = ["png", "pdf", "svg"]
for form in formats:
    os.makedirs(os.path.join(output_path, form), exist_ok=True)

for verb in interesting_verbs:
    fig = px.parallel_categories(
        train_df.loc[train_df["verb_class_name"] == verb],
        dimensions=["verb_class_name", "noun_class_name"],
        height=800,
        width=1000,
        labels={"verb_class_name": "Verb", "noun_class_name": "Noun"},
        title=f"Action co-occurrence in EK-100 training set for verb <b>{verb}</b>",
    )
    # fig.update_traces(dimensions=[None, {"categoryorder": "category descending"}])
    fig.show()

    fig.write_image(os.path.join(output_path, "png", f"{verb}_cooccurrence.png"), scale=2.0)
    fig.write_image(os.path.join(output_path, "pdf", f"{verb}_cooccurrence.pdf"))
    fig.write_image(os.path.join(output_path, "svg", f"{verb}_cooccurrence.svg"))

## Print co-occurrences V->N and N->V

In [27]:
cooccurrence

key,tap,spoon,plate,cupboard,knife,pan,lid,bowl,drawer,sponge,...,pillow,pen,face,plum,whiskey,door:kitchen,tape,camera,cd,extract:vanilla
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
take,2,886,833,7,802,499,483,533,5,529,...,0,0,0,1,0,0,0,0,1,0
put,1,685,773,4,635,507,600,504,8,376,...,0,1,0,1,0,0,0,0,0,0
wash,19,416,549,3,490,396,163,252,0,374,...,0,0,0,0,0,0,0,0,0,0
open,9,0,6,1342,2,2,105,4,936,0,...,0,0,0,0,0,0,0,0,0,0
close,5,0,1,919,0,3,98,2,643,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
prepare,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
bake,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
mark,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
bend,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
v_to_n = cooccurrence.div(cooccurrence.sum(axis=1), axis=0)
n_to_v = cooccurrence.div(cooccurrence.sum(axis=0), axis=1)

In [33]:
v_to_n

key,tap,spoon,plate,cupboard,knife,pan,lid,bowl,drawer,sponge,...,pillow,pen,face,plum,whiskey,door:kitchen,tape,camera,cd,extract:vanilla
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
take,0.000135,0.059671,0.056102,0.000471,0.054014,0.033607,0.032530,0.035897,0.000337,0.035628,...,0.0,0.000000,0.0,0.000067,0.0,0.0,0.0,0.0,0.000067,0.0
put,0.000082,0.056033,0.063231,0.000327,0.051943,0.041472,0.049080,0.041227,0.000654,0.030757,...,0.0,0.000082,0.0,0.000082,0.0,0.0,0.0,0.0,0.000000,0.0
wash,0.002743,0.060055,0.079255,0.000433,0.070738,0.057168,0.023531,0.036379,0.000000,0.053992,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
open,0.001848,0.000000,0.001232,0.275565,0.000411,0.000411,0.021561,0.000821,0.192197,0.000000,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
close,0.001436,0.000000,0.000287,0.263853,0.000000,0.000861,0.028137,0.000574,0.184611,0.000000,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
prepare,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
bake,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
mark,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
bend,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0


noun_class_name
tap               3617
plate             2457
spoon             2351
cupboard          2312
knife             2210
pan               2178
lid               1737
bowl              1688
drawer            1639
glass             1534
sponge            1523
hand              1434
fridge            1220
fork              1085
cup               1057
cloth              998
bottle             981
board:chopping     928
onion              925
bag                919
container          910
dough              884
liquid:washing     857
hob                848
spatula            841
box                835
meat               758
package            725
water              689
pot                642
cheese             608
oil                562
colander           532
salt               527
tray               513
food               511
bin                509
bread              497
potato             483
top                453
jar                451
tomato             450
paper             

In [75]:
n = 40
top_n_verbs = train_df["verb_class_name"].value_counts()[:n]
top_n_nouns = train_df["noun_class_name"].value_counts()[:n]
data = v_to_n.loc[top_n_verbs.index, top_n_nouns.index]
fig = px.imshow(
    data,
    width=max(20 * n, 1000),
    height=max(20 * n, 1000),
    title="Normalised verb-to-noun co-occurrence matrix",
    color_continuous_scale=["white", "coral"],
    labels={"x": "Nouns", "y": "Verbs"},
)
fig.update_xaxes(tickmode='array', tickvals=data.columns, ticktext=data.columns)
fig.update_yaxes(tickmode='array', tickvals=data.index, ticktext=data.index)

fig.show()

fig.write_image(f"../res/coocc/png/top{n}_v_to_n.png")