In [1]:
import sys
sys.path.insert(0, "/notebooks/pipenv")
sys.path.insert(0, "/notebooks/nebula3_vlm")
sys.path.insert(0, "/notebooks/nebula3_database")
sys.path.insert(0, "/notebooks/")
import os
import math
import random
import bisect
import pickle
import time
import numpy as np


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import urllib
import subprocess
import re
import tempfile
import itertools
import torch
import spacy
# import amrlib
# import penman

from typing import List, Tuple
from operator import itemgetter 
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer, BertForSequenceClassification
from database.arangodb import DatabaseConnector
from config import NEBULA_CONF
from movie_db import MOVIE_DB


In [3]:
class PIPELINE:
    def __init__(self):
        config = NEBULA_CONF()
        self.db_host = config.get_database_host()
        self.database = config.get_playground_name()
        self.gdb = DatabaseConnector()
        self.db = self.gdb.connect_db(self.database)

pipeline = PIPELINE()
mdb = MOVIE_DB()
from vlm.clip_api import CLIP_API
clip=CLIP_API('vit')
s2_collection_name = 's2_pipeline_after_gpt'
s2_results_orig_collection_name = 's2_pipeline_optim_orig'
s2_results_relaxed_collection_name = 's2_pipeline_optim_relaxed'
s2_compatibility_collection_name = 's2_pipeline_compatibility_scores'
s2_with_compat_collection_name = 's2_pipeline_compatibility_results'
s2_with_compat_collection_name2 = 's2_pipeline_compatibility_results2'


In [4]:
def flatten(lst): return [x for l in lst for x in l]

def compute_batch_scores(video_emb: torch.Tensor, texts: List[str], normalize=True, **kwargs) -> List[float]:    
    emb_batch = clip.clip_batch_encode_text(texts, **kwargs)                           
    return (video_emb.expand_as(emb_batch)*emb_batch).sum(dim=1).cpu().numpy()


def compute_concat_score(image_emb: torch.Tensor, texts: List[str], join_on=',') -> float:
    combined_text = ""
    for t in [x.strip() for x in texts]:
        if t[-1]=='.':
            t = t[:-1]       
        t+=join_on
        t+=' '
        combined_text+=t
    print("Combined: "+combined_text)
    return torch.matmul(image_emb,mdmmt.encode_text(combined_text.strip()) )       

In [5]:
class SimilarityManager:
    def __init__(self):
        self.nlp = spacy.load('en_core_web_lg')

    def similarity(self, src, target):
        rc = []
        s1 = self.nlp(src)
        s2 = self.nlp(target)
        for w in s1:
            # if not w or not w.vector_norm:
            #     print('Argghhh 1, bad word:')
            #     print(w.text)
            if w.pos_ not in ['NOUN', 'ADJ', 'ADV', 'VERB', 'PROPN', 'ADP'] and len(s1)>1:
                continue
            # for tok in s2:
            #     if not tok or not tok.vector_norm:
            #         print('Argghhh 2, bad word:')
            #         print(tok.text)
            #         print(s2.text)
            rc.append(max([w.similarity(x) for x in s2]))
        return np.mean(rc)
        
smanager = SimilarityManager()


In [6]:
softmax = lambda x: np.exp(x)/sum(np.exp(x))
def normalize(x):
    epsilon = 0.00001
    if np.std(x) < epsilon:
        return np.ones(x.shape)
    return (x - np.mean(x)) / np.std(x)


In [7]:
def powerset(iterable):
    s = list(iterable)
    return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(1,len(s)+1))


def optimize_sents(emb_video, experts, sents, compat_scores, use_ordered_scores=False):
    smanager = SimilarityManager()
    compat_scores = np.array(compat_scores)
    as_compat = compat_scores.argsort()
    # print(compat_scores)
    graded_scores = sorted(list(zip(as_compat,range(len(as_compat)))),key = lambda x:x[0])
    # print(list(zip(as_compat,range(len(as_compat)))))
    # print(graded_scores)
    order_scores = normalize(np.array(list(zip(*graded_scores))[1]))
    print(order_scores)
    # print(list(zip(compat_scores[as_compat],as_compat)))
    orig_similarity = compute_batch_scores(emb_video, sents)
    candidates_similarity = normalize(orig_similarity)
    coverage_matrix = np.zeros([len(experts),len(sents)])
    coverage_matrix[:] = np.nan
    for i in range(len(experts)):
        for j in range(len(sents)):
            coverage_matrix[i][j]=smanager.similarity(experts[i],sents[j])
        coverage_matrix[i] = normalize(coverage_matrix[i])

    def get_score(state: List[int]) -> float:
        theta_similarity = 1.
        theta_coverage = 1.3
        theta_compat = 1.
        if not state:
            return 0
        coverage_score = get_state_coverage(state)   
        similarity_score = candidates_similarity[state].mean().item()
        if use_ordered_scores:            
             compat_score = order_scores[state].mean().item()
        else:            
             compat_score = compat_scores[state].mean().item()
        return theta_coverage*coverage_score + theta_similarity*similarity_score + theta_compat*compat_score

    def get_expert_coverage(state):
        # return self.coverage_matrix[:,state].sum(axis=1)
        return coverage_matrix[:,state].max(axis=1)
          
    def get_state_coverage(state) -> float:
        # print("State coverage for {}:".format(state))
        # print(get_expert_coverage(state))
        return np.mean(get_expert_coverage(state))


    superset = list(range(len(sents)))
    pset = [list(x) for x in powerset(superset)]
    pset_scores = [get_score(x) for x in pset]
    best_cand = pset[np.argmax(pset_scores)]
    print("Best candidates:")
    print(best_cand)
    rc_sents = itemgetter(*best_cand)(sents)
    if type(rc_sents)==tuple:
        rc = list(rc_sents)
    elif type(rc_sents)==str:
        rc = [rc_sents]
    else:
        print("Bad return type!!")
    return rc, orig_similarity[best_cand].mean()


def optimize_scene(doc,mat=None, emb_video=None, **kwargs):
    mid = doc['movie_id']
    elem = doc['scene_element']
    emb_video = clip.clip_encode_video(mid,elem)
    all_sents = doc['sentences']
    rc = mdb.get_scene_from_collection(mid,elem,'s2_clsmdc')    
    experts = flatten(rc['experts'].values())
    rc = mdb.get_scene_from_collection(mid,elem,s2_compatibility_collection_name)  
    all_compat_scores = rc['compat_scores']
    n = len(all_sents)
    rc_sents = n*[None]
    mean_scores = n*[None]
    for i in range(n):
        rc_sents[i], mean_scores[i] = optimize_sents(emb_video,experts,all_sents[i],all_compat_scores[i], **kwargs)

    return rc_sents, mean_scores
    
def run_pipeline(all_docs, target_collection_name=s2_with_compat_collection_name, **kwargs):
    for doc in all_docs:
        mid = doc['movie_id']
        elem = doc['scene_element']
        rc = mdb.get_scene_from_collection(mid,elem,target_collection_name)
        if rc:
            print("Results already exist for {}/{}".format(mid,elem))
            continue
        print("Going forward with {}/{}".format(mid,elem))

        rc_sents, sim_scores = optimize_scene(doc,**kwargs)
        rc_doc = {
            'movie_id': mid,
            'scene_element': elem,
            'sentences': rc_sents,
            'mean_scores': sim_scores,
        }
        query = "INSERT {} INTO {}".format(rc_doc,target_collection_name)
        cursor = pipeline.db.aql.execute(query)  

In [8]:
query = 'FOR doc IN {} RETURN doc'.format(s2_compatibility_collection_name)
cursor = pipeline.db.aql.execute(query)
all_docs = sorted(list(cursor), key=lambda x:"{}/{}".format(x['movie_id'],x['scene_element']))
movies = list(set([x['movie_id'] for x in all_docs]))
all_movies = {}

for mid in movies:
    print("mid is {}".format(mid))
    story = []
    elements = sorted([x for x in all_docs if x['movie_id'] == mid],key=lambda y:y['scene_element'])
    all_movies[mid] = elements

mid is Movies/222510575
mid is Movies/222510324
mid is Movies/222510253
mid is Movies/222510046
mid is Movies/222509820
mid is Movies/222509945
mid is Movies/222510189
mid is Movies/222509634
mid is Movies/222510448
mid is Movies/222510810
mid is Movies/222509871
mid is Movies/222511095
mid is Movies/222510951
mid is Movies/222510692
mid is Movies/222509721
mid is Movies/222510136
mid is Movies/222511030
mid is Movies/222510403


In [33]:
def detect_outliers(doc, num_sigma=5.0):
    n = len(doc['compat_scores'])
    rc = []
    for i in range(n):
        scores = np.array(doc['compat_scores'][i])
        mean = scores.mean()
        var = scores.std()
        # rc.append(np.where(np.abs(scores-mean)>num_sigma*var)[0])
        rc.append(np.where(np.abs(scores-mean)>0.07)[0])

    return rc
    

In [34]:
all_outliers = []
for doc in all_docs:    
    rc = detect_outliers(doc,num_sigma=3.5)
    if len(flatten(rc)) > 0:
        all_outliers.append((doc['movie_id'],doc['scene_element']))

len(all_outliers)

4

In [35]:
all_outliers

[('Movies/222510575', 10),
 ('Movies/222510575', 9),
 ('Movies/222510692', 9),
 ('Movies/222510951', 2)]

In [36]:
mid, elem = all_outliers[0]
doc = all_movies[mid][elem]
list(enumerate(detect_outliers(doc,num_sigma=3.5)))

[(0, array([0, 1, 4, 6, 7])),
 (1, array([2, 8])),
 (2, array([1, 6, 7, 8, 9])),
 (3, array([0, 6])),
 (4, array([8])),
 (5, array([6])),
 (6, array([1])),
 (7, array([2, 7])),
 (8, array([4, 8])),
 (9, array([7]))]

In [28]:
i=0
list(enumerate(zip(doc['sentences'][i],doc['compat_scores'][i])))

[(0,
  ('a group of ladies holding guns are next to the conductor on the moon',
   0.0722657975344407)),
 (1,
  ('An old black and white photo of a group of men standing next to a rocket with a conductor with a hat and flag',
   0.0722622288529042)),
 (2,
  ('an old black and white photo of a group of men standing next to cannon and iron cannon, and ladies holding guns in the background in the sky',
   0.07226160822643043)),
 (3,
  ('cannon operators on a ladder  are shooting at ladies holding guns, hanging on sticks',
   0.0722655268854197)),
 (4,
  ('an old black and white photo of a group of men standing next to a rocket with the flag and guns, and ladder',
   0.07224343993955765)),
 (5,
  ('A group of ladies holding guns and iron cannon are standing on the moon.',
   0.07226539909893068)),
 (6,
  ('an old black and white photo of a group of men standing next to a rocket, with cannon operator on a ladder under clouds',
   0.07226088673452333)),
 (7,
  ('an old black and white photo 