In [299]:
import numpy as np
import pandas as pd
from os import path
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import glob
from bs4 import BeautifulSoup
import requests
import random
import pickle
import re
import spacy
import json
from spacy import displacy
import collections
from collections import Counter
from collections import OrderedDict
from operator import itemgetter
from matplotlib.lines import Line2D
from tqdm.notebook import tqdm as tqdm_notebook
from transformers import DistilBertModel, DistilBertTokenizer, logging
from itertools import islice
import itertools
import torch
import sys
import os
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout
from netgraph import Graph
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

In [2]:
# Environment settings
os.environ["TOKENIZERS_PARALLELISM"] = "false"
nlp = spacy.load('en_core_web_trf')
logging.set_verbosity_error()
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
sys.path.insert(0, '../src/models/')
sys.path.insert(0, '../src/features/')

In [3]:
# Own functinos
from web_crawler import web_crawler
from predict_model import loadBERT
from predict_model import SpanPredictor as classify

In [4]:
# Model
model = loadBERT("../models/", 'saved_weights_inf_FIXED_boot_beta80.pt')

CPU Success


In [11]:
def text_preparation(species, text):
    cleaners = [(r'(?<!\d)\.(?!\d)', ' '),
                (r'\s×\s', ' times '),
                (r'\s+c\s+', ' '),
                (r'â\x80\x93', ' to '),
                (r'\xa0', ' '),
                (r'\x97', ''),
                (r'\s{2,}', ' '),
                (r'(\D)(\.)', r'\1 '),
                (r'(\d)(\.)(\D)', r'\1 \3'),
                (r'(long,)', r'long and'),
                (r'(wide,)', r'wide and'),
               ]
    
    species_parts = species.split()
    candidates = [' '.join(species_parts[:idx+1]) for idx, _ in enumerate(species_parts)]
    #candidates += [
    #        f'{species_parts[0][0]}. {species_parts[1]}'
    #    ]
    candidates.reverse()
    for candidate in candidates:
        try:
            text = re.sub(candidate, 'the species', text)
        except:
            continue # Skip species with brackets for now
    for (cleaner, replacement) in cleaners:
        text = re.sub(cleaner, replacement, text)    
    text = f'{text.strip()}.'
    return text.capitalize()


def dict_sentence(t):
    sentence_dict = {}
    for child in t.children:
        # Exceptions
        if (
            child.dep_ not in [
                'det', 'cc', 'punct', 
                'poss', 
                'nmod', 'ccomp', # Gives to many errors
                'conj',
                'prt', # Verb things
            ]
            and child.pos_ not in [
                'DET', 'PUNCT',
                'PART',
                'ADV',
                'SCONJ',
                'PRON',
            ]
            and child.lemma_ not in [
                'on', 'of', 'as', 'from', 'by', 'onto',
            ]
        ):
            items = dict_sentence(child)
            sentence_dict[child] = items 
    return sentence_dict

def undict_to_tuples(d, acc = []):
    if d == {}:
        yield acc
    else:
        for k, v in d.items():
            yield from undict_to_tuples(v, acc + [k,])
            
def undict_to_pairs(d):
    for k,v in d.items():
        for subk in v:
            yield (k, subk)
        yield from undict_to_pairs(v)

def dict_sentence_parent(t):
    if t.dep_ in [
        'nsubj', 'nsubjpass', 'relcl',
    ]:
        parent = next(tok for tok in t.ancestors)
        parent_dict = dict_sentence(parent)
        del parent_dict[t]
        return parent_dict

def update_nested_dict(main_dict, new_dict):
    for name, rc_dict in new_dict.items():
        main_dict.setdefault(name, {}).update(rc_dict)
    return main_dict       

def extract_compounds(t, doc):
    head = None
    if t.dep_ == 'compound':
        t = next(t.ancestors)
    indices = [child.i for child in t.children
               if child.dep_ == 'compound'
               or child.lemma_ in compound_list
               and child.i < t.i]
    indices.append(t.i)
    indices.sort(reverse=True)
    compounds  = []
    for idx in indices:
        compounds.append(doc[idx : t.i + 1])
    return compounds    
            
            
def check_existance(t):
    item = None
    for mainpart in glossary.keys():
        if t.lemma_ in compound_list:
            item = None
        #elif t.pos_ != 'NOUN':
        elif t.pos_ not in ['NOUN', 'PROPN']:
            item = None
        elif t.lemma_.lower().strip() in glossary[mainpart]:
            item = mainpart            
    return item


def clean_compounds(item_list, doc):
    new_item_list = []
    new_item_list.append(item_list[0])
    for item in item_list[1:]:
        item_orig = item
        if type(item) == spacy.tokens.token.Token:
            item = item
        else:
            item = item.root
        #if item.dep_ == 'prep':
        #    new_item_list += item_list[1:]
        #    break
        if (
            item.lemma_ in compound_list 
            or item.dep_ == 'compound'
        ):
            continue
        elif item.pos_ == 'NOUN':
            compound = extract_compounds(item, doc)[-1]
            if len(compound) == 1:
                #print(compound)
                compound = compound.root
            new_item_list.append(item_orig)
        else:
            new_item_list.append(item_orig)
    return new_item_list

def clean_measurements(info_list):
    new_item_list = []
    for item_list in info_list:
        
        nums = [t.dep_ for t in item_list if type(t) == spacy.tokens.token.Token if t.pos_ == 'NUM']
        if len(nums) > 1:
            temp = []
            for item, future in zip(item_list, item_list[1:]):        
                if type(item) == spacy.tokens.span.Span:
                    temp.append(item)
                elif item.pos_ == 'NUM' and future.pos_ == 'NUM':
                    new_item_list.append(temp + [item])
                    new_item_list.append(temp + [future])
                else:
                    temp.append(item)
        else:
            new_item_list.append(item_list)
    return new_item_list


def clean_prepositions_of(info_list):
    new_info_lists = []
    for info in info_list:
        new_item_list = []
        used = []
        for item in info:
            item_orig = item
            if type(item) == spacy.tokens.token.Token:
                item = item
            else:
                item = item.root
            prep_of = next((t for t in item.children if t.lemma_ == 'of'), None)
            if prep_of:
                child = next((t for t in prep_of.children if t.dep_ == 'pobj'), None)
            if prep_of and child:
                #print(item, prep_of, child)
                new_item_list.append(doc[item.i : child.i + 1])
            else:
                new_item_list.append(item_orig)
        new_info_lists.append(new_item_list)
    return new_info_lists


def clean_prepositions_on(info_list):
    new_info_lists = []
    #print(info_list)
    for info in info_list:
        new_item_list = []
        used = []
        for item in info:
            item_orig = item
            #print(item)
            if type(item) == spacy.tokens.token.Token:
                item = item
            else:
                item = item.root
            prep = next((t for t in item.children if t.lemma_ == 'on'), None)
            #print(prep_on)
            if prep:
                child = next((t for t in prep.children if t.dep_ == 'pobj'), None)
            if prep and child:
                #print(item, prep, child)
                new_item_list.append(doc[item.i : child.i + 1])
            else:
                new_item_list.append(item_orig)
        new_info_lists.append(new_item_list)
    return new_info_lists

def clean_prepositions_in(info_list):
    new_info_lists = []
    for info in info_list:
        new_item_list = []
        used = []
        for item in info:
            if item.lemma_ == 'in':
                break
            new_item_list.append(item)
        new_info_lists.append(new_item_list)
    return new_info_lists


def clean_prepositions_with(info_list):
    new_info_lists = []
    for info in info_list:
        new_item_list = []
        used = []
        for item in info:
            if item.lemma_ == 'with':
                continue
            new_item_list.append(item)
        new_info_lists.append(new_item_list)
    return new_info_lists
        
def create_relation(part, compounds, item_list):   
    
    #temp_compound = compounds[-1]
    
    subjects = item_list[:-1]
    objects  = item_list[1:]
    relation = None
    triples = []
    
   
    # Compound
    compound = compounds[-1][-1]
    # Flatten
    compounds = list(sum(compounds, ()))
    
    for sub, obj in zip(subjects, objects):
        relation = relation = f'{part} temp'
        if sub.lemma_ in compounds:
            sub = compound
        try:
            obj_text = obj.lemma_
        except:
            obj_text = obj
        try:
            sub_text = sub.lemma_
        except:
            sub_text = sub
        if obj_text in sub_text:
            continue
        triples.append((sub, relation, obj))
    return triples
    
def extract_triples(doc):
    # Speed up the extraction
    previous = []
    AIKEs_list = []
    triples = []
    for t in doc:
        part = check_existance(t)
        #print(part, t)
        if part:
            # Create temp list for storing compounds
            compounds_temp = []
            compounds_temp.append(('species', 'has_main_part', part.capitalize()))           
            compound = part.capitalize()
            #print(compound)
            for new_compound in extract_compounds(t, doc):
                if type(compound) == str:
                    compounds_temp.append((compound, 'has_sub_part', new_compound.lemma_))
                else:
                    compounds_temp.append((compound.lemma_, 'has_sub_part', new_compound.lemma_))
                compound = new_compound
                #print(compound)
            # Reset T
            t = compound.root
            # Get child dict
            child_dict = {compound: dict_sentence(t)}
            # Get parent dict
            parent_dict = {compound: dict_sentence_parent(t)}
            # Update if exists
            if parent_dict[compound]:
                # Add dicts together
                sentence_dict = update_nested_dict(child_dict, parent_dict)
                #print(sentence_dict)
            else:
                sentence_dict = child_dict
            # List dict into tuples
            info_lists = list(undict_to_tuples(sentence_dict))
            info_lists = clean_measurements(info_lists)
            info_lists = clean_prepositions_of(info_lists)
            info_lists = clean_prepositions_in(info_lists)
            info_lists = clean_prepositions_on(info_lists)
            info_lists = clean_prepositions_with(info_lists)

            #print('end', t, info_lists)
            for info in info_lists:
                # Skip no info
                if len(info) <= 1:
                    continue
                info = clean_compounds(info, doc)
                triples.extend(compounds_temp)
                triples.extend(create_relation(part, compounds_temp, info))
            
    return list(dict.fromkeys(triples))

def clean_triples(kn):
    kn_cleaned = []
    # Quick Fix
    for (sub, rel, obj) in kn:
        if type(sub) != str:
            sub = sub.lemma_.lower()
        if type(rel) != str:
            rel = rel.lemma_.lower()
        if type(obj) != str:
            obj = obj.lemma_.lower()
        if rel == 'has_main_part':
            sub.capitalize()
        kn_cleaned.append((sub, rel, obj))

    return list(set(kn_cleaned))

def knowledge_graph_subset(species, parts, kn_cleaned):

    baseparts, traits, source, relation, target, correct_parts = ([] for i in range(6))

    # Extract the data
    for (sub, rel, obj) in kn_cleaned:
        #print((sub, rel, obj))
        if sub == 'species':
            sub = species
        source.append(sub), relation.append(rel), target.append(obj), 
        if rel == 'has_main_part':
            baseparts.append(obj)
        if rel == 'has_sub_part':
            traits.append(obj)

    # Fit data into DF
    kg_df = pd.DataFrame({'source':source, 'target':target, 'edge':relation})
    if parts:
        if type(parts) != list:
            parts = [parts]
        relations = [f'{part.lower()} temp' for part in parts]
        #relations += ['has_main_part', 'has_sub_part']
    
    values = list(kg_df[kg_df['edge'].isin(relations)]['source'].values)
    values += list(kg_df[kg_df['edge'].isin(relations)]['target'].values) 
    
    return list(set(values))

def possible_parts(species, data):
    return [obj for (sub, rel, obj) in data[species] if rel=='has_main_part']

def create_weighted_graph(species, data, parts=False,
                          start=0, end=9999,
                          k=.08, scale=0.3, iterations=50, size=1.5,
                          log=True, seed=333,
                          species_traits=False):
    
    # Init variables
    baseparts, traits, source, relation, target, correct_parts = ([] for i in range(6))
    colors, node_size, node_color = ({} for i in range(3))
       
    # Extract the data
    for (sub, rel, obj) in data[species][start:end]:
        #print((sub, rel, obj))
        if sub == 'species':
            sub = species
        source.append(sub), relation.append(rel), target.append(obj), 
        if rel == 'has_main_part':
            baseparts.append(obj)
        if rel == 'has_sub_part':
            traits.append(obj)
        
    # Fit data into DF
    kg_df = pd.DataFrame({'source':source, 'target':target, 'edge':relation})
    # Subset if parts
    if parts:
        if type(parts) != list:
            parts = [parts]
        relations = [f'{part.lower()} temp' for part in parts]
        #relations += ['has_main_part', 'has_sub_part']
        
        kg_df = kg_df[kg_df['edge'].isin(relations) | 
                      kg_df['target'].isin(parts) |
                      kg_df['source'].isin(parts)]
    
    #print(kg_df)
    
    # Extract the node names
    nodes = [(source, target) for source, target in zip(kg_df['source'].values, kg_df['target'].values)]
    # Init a graph
    G=nx.from_pandas_edgelist(kg_df, "source", "target", edge_attr=True, create_using=nx.Graph())
    # Extract the labels
    node_labels = {node : node for node in G}
    edge_labels = dict(zip(list(zip(kg_df.source, kg_df.target)), kg_df['edge'].tolist()))
    
    #print(node_labels)
    
    # Fil in the dict
    for node in node_labels:
        if node == species:
            node_color[node] = 'white' 
            node_size[node] = 3/size
        else: 
            if node in baseparts: node_size[node] = 1.3/size
            elif node in traits: node_size[node] = 1.2/size
            else: node_size[node] = 1/size   
    
    # Determine position            
    pos = nx.spring_layout(G, k=k, iterations=iterations, seed=seed, scale=scale, center=(0,0), dim=2)
    
    return pos, nodes, node_size, node_color, node_labels, edge_labels

In [5]:
# Load Glossary
glossary = collections.defaultdict(list)

items = pickle.load(open('../data/glossaries/birds.pkl', 'rb'))
items += [
    'underpart', 'underside',
    'chest', 'band',
    'coloration',
    'body', 'breast',
    'male', 'female',
    'margin', 
    'plume', 'skin', 'wash',
    'mask',
    'legs', 'beak', 'head', 'wingspan', 'eye', 'forecrown', 'underpart',

]

for item in tqdm_notebook(items):
    doc = nlp(item)
    glossary[item].append(item)
    # Singular
    glossary[item].append(doc[0:].lemma_)
    

  0%|          | 0/1190 [00:00<?, ?it/s]

In [9]:
compound_list = [
    'fertile', 'sterile',
    'male', 'female', 'bisexual', 'hermaphroditic', 
    'basal', 'developed', 'definitive', 'prebasic', 'juvenile', 'new',
    'primary', 'secondary', 'main',
    'upper', 'lower', 'greater', 'dorsal', 'alternate', 'lesser', 'apex', 'outer', 'less', 'main', 'median', 'frontal',
    'central', 'outermost', 'outer', 'inner', 'uppermost', 'median', 'dorsal', 'central', 'lateral',
    'young', 'mature', 'individual', 'old',
    'opposite', 'single', 'paired', 'malar',
    'winter', 'summer', 'autumn', 'spring',
    'breeding',
    
]

rubbish_list = [
    '.', ',', '-', '..', '...', '', 
]

In [10]:
measurements_list = [
    'mm', 'cm', 'm', 'km',
    'milimeter', 'centimeter', 'meter', 'kilometer',
    'milimetre', 'centimetre', 'metre', 'kilometre',
    'inch', 'foot', 'yard', 'mile',
    'wide', 'long', 'broad', 'tall',
    'length', 'form',
]

In [70]:
# birds descriptions
data = pickle.load(open('../data/description/04_TRAIN_0000000-0002000_BIRDS.pkl', 'rb'))
# Open the text file
location = "../data/external/birds_matched.txt"
with open(location) as f:
    lines = f.readlines()

BOW_list = []
CUB_list = []
for line in lines[1:]:
    line = line.strip()
    _, CUB, BOW = line.split(';')
    BOW_list.append(BOW)  
    CUB_list.append(CUB)
    
data['Cape Starling'] = [
    "Fairly large, short-tailed glossy starling with rather uniform appearance.",
    "Head is glossy blue, blacker on ear-coverts.",
    "upperparts blue-green with strong iridescence",
    "wing blue-green, dark blue spots at tips of some median and greater coverts.",
    "distinct bronzy-purple epaulet, primaries P6-P9 strongly indented on inner webs",
    "tail glossy blue-green."
    "throat and upper breast have blue iridescence."
    "lower breast, belly and undertail-coverts with greener gloss."
    "iris bright orange-yellow.",
    "bill and legs black.", 
    "Sexes alike.",
    "Juvenile is dull-plumaged, with matt black underparts, iris initially grey.",
    "at three months iris dull yellow, acquiring adult colour after six months",
]

data_cleaned = collections.defaultdict(list)
for bird in data.keys():
    if bird in BOW_list:
        data_cleaned[bird] = data[bird]

In [26]:
description_dict = collections.defaultdict(list)
triples_dict = collections.defaultdict(list)

# Get sentences
for bird in tqdm_notebook(list(data_cleaned.keys())[0:]):
    # Extract triples
    kn_data = []
    for text in data_cleaned[bird]:
        text = text_preparation(bird, text)
        doc = nlp(text)
        kn_data.extend(extract_triples(doc))
    triples_dict[bird] = clean_triples(kn_data)

  0%|          | 0/200 [00:00<?, ?it/s]

In [27]:
# with open(f'../data/description/triples_CUB_dataset.pkl', 'wb') as f:
#         pickle.dump(triples_dict, f)  

In [None]:
triples_dict = pickle.load(open(f'../data/description/triples_CUB_dataset.pkl', 'rb'))

In [33]:
#triples_dict['Eared Grebe']

In [36]:
# Open the text file
location = "../data/external/CUB_200_2011/CUB_200_2011/classes.txt"
with open(location) as f:
    lines = f.readlines()
    
# Init regex
regex = r'\d+\s\d+\.'
# Clean the list
CUB = [re.sub(regex, '', line).rstrip().replace('_', ' ') for line in lines]

In [40]:
# Open the text file
location = "../data/external/CUB_200_2011/attributes.txt"
with open(location) as f:
    lines = f.readlines()
    
attribute_list = []

# Create a DF with attributes
for line in lines:
    line = line.strip()
    attributes = line.split()[1].split('::')
    part = attributes[0]
    adjective = attributes[1]
    attribute_list.append((part, adjective))
    
# Create DF        
df_attributes = pd.DataFrame(attribute_list, columns=['Part', 'Adj'])
# Reset the index
df_attributes.index = np.arange(1, len(df_attributes)+1)

In [69]:
# Open the text file
location = "../data/external/CUB_200_2011/CUB_200_2011/attributes/image_attribute_labels.txt"
with open(location) as f:
    lines = f.readlines()
# Open the text file
location = "../data/external/CUB_200_2011/CUB_200_2011/images.txt"
with open(location) as f:
    img_ids = f.readlines()
    
# Create DF will all present parts
CUB_dict = collections.defaultdict(list)
# Loop over lines
for line in tqdm_notebook(lines):
    data = line.split()
    # Skip non present
    if not int(data[2]):
        continue
    # Skip uncertain things
    if int(data[3]) < 4:
        continue
    # Otherwise append
    #print(CUB[int(img_ids[int(data[0])].split()[1][0:3]) - 1], data)
    CUB_dict[CUB[int(img_ids[int(data[0])-1].split()[1][0:3]) - 1]].append((df_attributes.loc[int(data[1]), ][0], df_attributes.loc[int(data[1]), ][1]))

  0%|          | 0/3677856 [00:00<?, ?it/s]

#df_CUB = pd.DataFrame.from_dict(CUB_dict, orient='index')

# COMPARISON

In [256]:
from predict_model import load_simBERT
from build_features import similarity_matrix as vector_values
from sklearn.metrics.pairwise import cosine_similarity

model = load_simBERT()

In [370]:
CUB_retok = collections.defaultdict(list)
#CUB_retok = dict()


for idx, bird in enumerate(tqdm_notebook(CUB_list[0:])):
    
    # Count values
    temp = Counter(CUB_dict[bird])
    numbers = OrderedDict(sorted(temp.items(), key = itemgetter(1), reverse = True))
    passed = []

    for (part, obj) in numbers.keys():
        if part in passed:
            continue
        else:
            sub = re.sub(r'has_',  '', part)
            sub = re.sub(r'_',  ' ', sub)
            obj = re.sub(r'_',  ' ', obj)
            CUB_retok[BOW_list[idx]].append((sub, obj))
            passed.append(part)
    
with open(f'../data/CUB-COLABVERSION_v2.pkl', 'wb') as f:
        pickle.dump(CUB_retok, f)  

  0%|          | 0/200 [00:00<?, ?it/s]

In [373]:
# PARTS
parts = []
for bird in BOW_list:
    for (sub, _) in CUB_retok[bird]:
        candidate = re.sub(" .*", "", sub)
        if candidate not in parts:
            parts.append(candidate)
            
parts = [nlp(part)[0:].lemma_ for part in parts]

In [246]:
parts_dict = {
    'eye': ['eye', 'iris'],
    'underpart': ['underpart'],
    'head': ['head']
}

In [273]:
bird = BOW_list[30]

for MainPart in parts_dict.keys():
    SubParts = parts_dict[MainPart]
    parts_OWN = []
    items_CUB = []
    for SubPart in SubParts:
        parts_OWN += [(sub, rel, obj) for (sub, rel, obj) in triples_dict[bird] if SubPart in rel.lower()]
        items_CUB += [(sub, obj) for (sub, obj) in CUB_retok[bird] if SubPart in sub.lower()]        
        
    parts_OWN = list(set(parts_OWN))
    items_CUB = list(set(items_CUB))

In [377]:
{part : [part] for part in parts}

{'bill': ['bill'],
 'eye': ['eye'],
 'belly': ['belly'],
 'wing': ['wing'],
 'shape': ['shape'],
 'upperpart': ['upperpart'],
 'primary': ['primary'],
 'breast': ['breast'],
 'nape': ['nape'],
 'size': ['size'],
 'forehead': ['forehead'],
 'underpart': ['underpart'],
 'back': ['back'],
 'throat': ['throat'],
 'crown': ['crown'],
 'tail': ['tail'],
 'under': ['under'],
 'leg': ['leg'],
 'upper': ['upper'],
 'head': ['head']}

In [274]:
string1 = ""
for (sub, obj) in items_CUB:
    string += f'{sub.capitalize()} is {obj}. '

In [269]:
string1

'Forehead color is grey. Forehead color is buff. Forehead color is white. Forehead color is red. Forehead color is brown. Head pattern is unique pattern. Head pattern is eyeline. Head pattern is capped. Head pattern is plain. Forehead color is orange. Head pattern is malar. Forehead color is black. Head pattern is eyering. '

In [270]:
string3

'Forehead color is black. Forehead color is grey. Head pattern is crested. Forehead color is white. Head pattern is striped. Forehead color is brown. Head pattern is unique pattern. Head pattern is malar. Head pattern is eyeline. Forehead color is pink. Head pattern is eyering. Head pattern is eyebrow. Forehead color is orange. Forehead color is yellow. Head pattern is capped. '

In [275]:
string4

'Head pattern is eyebrow. Forehead color is grey. Forehead color is yellow. Forehead color is white. Head pattern is crested. Head pattern is striped. Forehead color is brown. Head pattern is unique pattern. Head pattern is eyeline. Head pattern is eyering. Head pattern is plain. Forehead color is buff. Head pattern is malar. Forehead color is black. Head pattern is capped. Forehead color is orange. '

In [253]:
string2 = ""
for (sub, rel, obj) in parts_OWN:
    string2 += f'{sub.capitalize()} is {obj}. '


In [254]:
string2

'Head is grey. Head is sepia. Head is uniform. Head is fresh. Head is upperpart. Margin is faint. Head is deep. Tip is brown. Head is breast. Head is black. Breast is red. Margin is buffy. Head is gray. Head is tip. Brown is wood. Head is feather. Head is margin. Tip is pale. '

In [276]:
doc1_tok = vector_values([string1, string3, string4, string2], model=model)
matrix = cosine_similarity(doc1_tok, doc1_tok)

In [277]:
matrix

array([[0.9999998 , 0.9887011 , 0.99068123, 0.8513702 ],
       [0.9887011 , 0.9999997 , 0.9944166 , 0.8453908 ],
       [0.99068123, 0.9944166 , 1.0000004 , 0.85203004],
       [0.8513702 , 0.8453908 , 0.85203004, 1.0000001 ]], dtype=float32)

In [224]:
part = parts[11]

for bird in BOW_list[60:61]:
    # Get items
    items_OWN = [(sub, obj) for (sub, rel, obj) in triples_dict[bird] 
                 if (part in rel.lower())
                 or (part in sub.lower())
                 or (part in obj.lower())
                 ]
    items_CUB = [(sub, obj) for (sub,      obj) in CUB_retok[bird] if part in sub.lower()]
    
    items_CUB = list(set(items_CUB))
    items_OWN = list(set(items_OWN))

In [225]:
items_OWN

[]

In [213]:
items_CUB

[('primary color', 'iridescent'),
 ('primary color', 'grey'),
 ('primary color', 'brown'),
 ('primary color', 'white'),
 ('primary color', 'black'),
 ('primary color', 'buff')]

In [214]:
items_OWN

[]

'Savannah Sparrow'

In [149]:
parts

['underpart',
 'breast',
 'head',
 'throat',
 'eye',
 'bill',
 'forehead',
 'under',
 'nape',
 'belly',
 'tail',
 'primary',
 'leg',
 'crown',
 'wing',
 'upperpart',
 'back',
 'upper',
 'size',
 'shape']

In [215]:
triples_dict[bird] 

[('plume', 'plume temp', 'auricular'),
 ('feather', 'feather temp', 'large'),
 ('appear', 'bills temp', 'deep'),
 ('at', 'plates temp', 'end of incubation'),
 ('species', 'has_main_part', 'Wings'),
 ('body molt', 'molts temp', 'overall'),
 ('size', 'has_sub_part', 'body size'),
 ('nape', 'plume temp', 'neck'),
 ('plate', 'plates temp', 'rictal'),
 ('species', 'has_main_part', 'Legs'),
 ('bill of breed condition bird', 'bills temp', 'orange'),
 ('species', 'has_main_part', 'Chins'),
 ('orange', 'bills temp', 'tip'),
 ('wing covert', 'wings temp', 'gray'),
 ('like', 'crest temp', 'quail'),
 ('Body', 'has_sub_part', 'size'),
 ('cm', 'wingspan temp', '40–50'),
 ('Eye', 'has_sub_part', 'eye'),
 ('rectrix', 'rectrices temp', '14'),
 ('bill', 'bills temp', 'oppose'),
 ('gray', 'wings temp', 'dark'),
 ('gray', 'plumage temp', 'sooty'),
 ('to', 'bills temp', 'gray'),
 ('plate', 'plates temp', 'subnasal'),
 ('Eye', 'has_sub_part', 'plume'),
 ('Crest', 'has_sub_part', 'ornament'),
 ('alternate pl

In [None]:
if(
    action == 'actionA' and
    collection == 'collectionA'
):
    doActionA(collectionA)
elif(
    collection == 'collectionB'
):
    doActionA(collectionB)
if(
    action == 'actionB' and
    collection == 'collectionA'
):
    doActionB(collectionA)
elif(
    collection == 'collectionB'
):
    doActionB(collectionB)