In [1]:
import sys
sys.path.insert(0, "/notebooks/pipenv")
sys.path.insert(0, "/notebooks/nebula3_vlm")
sys.path.insert(0, "/notebooks/nebula3_database")
sys.path.insert(0, "/notebooks/")
import os
import math
import random
import bisect
import pickle
import time
import numpy as np


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import urllib
import subprocess
import re
import tempfile
import itertools
import torch
import spacy
# import amrlib
# import penman

from typing import List, Tuple
from operator import itemgetter 
from transformers import AutoModel, AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer, BertForSequenceClassification
from database.arangodb import DatabaseConnector
from config import NEBULA_CONF
from movie_db import MOVIE_DB


In [3]:
class PIPELINE:
    def __init__(self):
        config = NEBULA_CONF()
        self.db_host = config.get_database_host()
        self.database = config.get_playground_name()
        self.gdb = DatabaseConnector()
        self.db = self.gdb.connect_db(self.database)

pipeline = PIPELINE()
mdb = MOVIE_DB()
from vlm.clip_api import CLIP_API
clip=CLIP_API('vit')

In [4]:
def flatten(lst): return [x for l in lst for x in l]

def compute_batch_scores(video_emb: torch.Tensor, texts: List[str], normalize=True, **kwargs) -> List[float]:    
    emb_batch = clip.clip_batch_encode_text(texts, **kwargs)                           
    return (video_emb.expand_as(emb_batch)*emb_batch).sum(dim=1).cpu().numpy()


def compute_concat_score(image_emb: torch.Tensor, texts: List[str], join_on=',') -> float:
    combined_text = ""
    for t in [x.strip() for x in texts]:
        if t[-1]=='.':
            t = t[:-1]       
        t+=join_on
        t+=' '
        combined_text+=t
    print("Combined: "+combined_text)
    return torch.matmul(image_emb,mdmmt.encode_text(combined_text.strip()) )       

In [5]:
def transform_concept(c):
    exp = re.compile(r"^([a-zA-z]+)-?(\d*)$")
    r = exp.match(c)
    return r.group(1) if r else c

class ConceptManager:
    def __init__(self):
        pass
    def ground_concept(concept):
        return transform_concept(concept)

In [6]:
class SimilarityManager:
    def __init__(self):
        self.nlp = spacy.load('en_core_web_lg')

    def similarity(self, src, target):
        rc = []
        s1 = self.nlp(src)
        s2 = self.nlp(target)
        for w in s1:
            if w.pos_ not in ['NOUN', 'ADJ', 'ADV', 'VERB', 'PROPN'] and len(s1)>1:
                continue
            rc.append(max([w.similarity(x) for x in s2]))
        return np.mean(rc)
        
smanager = SimilarityManager()


In [76]:
softmax = lambda x: np.exp(x)/sum(np.exp(x))
normalize = lambda x: (x - np.mean(x)) / np.std(x)

class SubsetOptimization:
    def __init__(self, video_emb, experts: List, candidates_strings: List[str], coverage_matrix = None):
        self.video_emb = video_emb
        self.initial_temp = 10
        self.final_temp = .001
        self.alpha = 0.01
        self.theta = 0.9
        self.theta1 = 0.5
        self.remove_prob_temprature = 10.
        self.expert_to_cover_temp = 1.
        self.candidate_to_add_cover_temp = 10.
        self.candidate_to_add_vlm_temp = 10.
        self.reset_every = 5000
        self.experts = experts
        self.coverage_threshold = 3.0
        self.candidates_strings = candidates_strings
        print("Computing batch similarity...")
        self.candidates_similarity = compute_batch_scores(self.video_emb, self.candidates_strings)
        print("Done")
        self.opt_results = []
        self.smanager = SimilarityManager()

        if coverage_matrix is not None:
            self.coverage_matrix = coverage_matrix
        else:
            self.coverage_matrix = np.zeros([len(self.experts),len(self.candidates_strings)])
            self.coverage_matrix[:] = np.nan
            for i in range(len(experts)):
                for j in range(len(candidates_strings)):
                    self.coverage_matrix[i][j]=self.concept_similarity(self.experts[i],self.candidates_strings[j])
        self.max_size = int(len(self.experts)*1.5)

    def concept_similarity(self, concept, sent):        
        # return max(self.smanager.similarity(concept,sent))
        return self.smanager.similarity(concept,sent)

    def get_coverage(self,i,j):        
        if np.isnan(self.coverage_matrix[i][j]):
            self.coverage_matrix[i][j] = self.concept_similarity(self.experts[i],self.candidates_strings[j])
        return self.coverage_matrix[i][j]

    # Return, for each expert, the -sum total- of how much it is covered by the state.

    def get_expert_coverage(self,state):
        return self.coverage_matrix[:,state].sum(axis=1)
        # return self.coverage_matrix[:,state].max(axis=1)

    def get_state_coverage(self,state) -> float:
        # print("State coverage for {}:".format(state))
        # print(self.get_expert_coverage(state))
        return np.mean(self.get_expert_coverage(state))

    # def get_state_coverage(self, state: List[int]) -> float:
    #     experts_coverage = [max([self.get_coverage(i,j) for j in state]) for i in range(len(self.experts))]    # A list of partial coverege        
    #     return sum(experts_coverage) / len(self.experts)

    def get_score(self, state: List[int]) -> float:
        if not state:
            return 0
        coverage_score = self.get_state_coverage(state)   
        similarity_score = self.candidates_similarity[state].mean().item()
        return (1-self.theta)*coverage_score + self.theta*similarity_score


    def prob_to_remove(self, state):
        cover = self.get_state_coverage(state)
        relative_cover = min(max(cover / self.coverage_threshold,0),1)
        rc = np.power(relative_cover,4)
        # print("Cover, relative cover, prob to remove: {}, {}, {}".format(cover, relative_cover, rc))
        return rc


    def candidate_to_add_probs(self, expert_to_cover, anti_state):
        add_cover = normalize(self.coverage_matrix[expert_to_cover][anti_state])
        add_similarity = normalize(self.candidates_similarity[anti_state])
        # print("add_cover:")
        # print(add_cover)
        # print("add_similarity:")
        # print(add_similarity)
        probs = softmax(add_cover*self.candidate_to_add_cover_temp + add_similarity*self.candidate_to_add_vlm_temp)
        return probs
     
    # state here is assumed (and guaranteed on return) to be -sorted-
    def get_candidate(self, state: List[int]) -> List[int]:
        def compute_state_arrays(s):
            s_score = self.candidates_similarity[s]
            s_coverage = self.coverage_matrix.mean(axis=0)[s]
            s_max_coverage = self.coverage_matrix.max(axis=0)[s]
            s_fitscore = (1-self.theta1)*s_coverage+self.theta1*s_score
            # print("fitscore: {}".format(s_fitscore))
            return (s_score,s_coverage,s_max_coverage,s_fitscore)

        if not state:
            # print("Empty state")
            return [random.randint(0,len(self.candidates_strings)-1)]
            
        rc = state.copy()
        s = np.array(state)
        s_score, s_coverage, s_max_coverage, s_fitscore = compute_state_arrays(s)
               
        if len(state) == self.max_size:
            print("Maximum state size, removing")
            idx = np.argmin(s_fitscore)
            del rc[idx]
            return rc
            
        remove_sentence = random.random()<self.prob_to_remove(state)      
        # print("coverage of {} is {}, remove?{}".format(state,self.get_state_coverage(state),remove_sentence))
        if remove_sentence:             # We decide to remove a sentence from the set
            # print("Removing")
            probs = softmax(-s_fitscore*self.remove_prob_temprature)
            # print(probs)
            idx = np.random.multinomial(1,probs).argmax()
            # print("removing index: {}".format(idx))
            del rc[idx]                   
        else:                           # Add a sentence from the outside
            # print("Adding")
            anti_state = []
            for i in range(len(self.candidates_strings)):
                if not i in state:
                    anti_state.append(i)
            s1 = np.array(anti_state)
            s1_score, s1_coverage, s1_max_coverage, s1_fitscore = compute_state_arrays(s1)
            # Pick an expert to try and cover
            expert_coverage = self.get_expert_coverage(s)
            # print("expert coverage when adding:")
            # print(list(zip(self.experts,expert_coverage)))
            probs = softmax(-expert_coverage*self.expert_to_cover_temp)         # Coverage is in (0,1), so we use low temprature
            # print(probs)
            expert_to_cover = np.random.multinomial(1,probs).argmax()
            # print("Trying to cover expert: {}".format(self.experts[expert_to_cover]))
            probs = self.candidate_to_add_probs(expert_to_cover,s1)
            # probs = softmax(self.coverage_matrix[expert_to_cover][s1]*10)
            idx_to_add = np.random.multinomial(1,probs).argmax()
            bisect.insort(rc,anti_state[idx_to_add])
            
        return rc

    def temp_schedule(self,i):
        schedule = [(5000,0.5), (15000,0.1), (25000,0.01), (35000,0.005), (45000,self.final_temp)]
        if i<schedule[0][0]:
            return schedule[0][1]
        if i>=schedule[-1][0]:
            return schedule[-1][1]
        for j in range(len(schedule)):
            if i<schedule[j+1][0]:
                break
        start = schedule[j][0]
        end = schedule[j+1][0]
        start_val = schedule[j][1]
        end_val = schedule[j+1][1]

        # if i > 20:
        #     return self.final_temp

        return ((i-start)/(end-start))*(end_val-start_val)+start_val         

    def get_scored_permutations(self, k):
        n = len(self.candidates)
        return [(x,self.get_score(list(x))) for x in itertools.permutations(range(n),k)]

    def reset(self):
        return max(self.opt_results,key=lambda x:x[1])[0]
    
        
    def simulated_annealing(self, initial_state, clear_prev = False, reset_every = None):
        current_temp = self.initial_temp
        i = 0
        if clear_prev:
            self.opt_results = []
        if not reset_every:
            reset_every = self.reset_every

       # Start by initializing the current state with the initial state
        current_state = initial_state
        curr_score = self.get_score(initial_state)

        while current_temp > self.final_temp:
            if i % reset_every == 0 and i>0:                
                next_cand = self.reset()
                print("Reset to state: {}".format(next_cand))
            else:
                next_cand = self.get_candidate(current_state)            
            next_score = self.get_score(next_cand)

            # print("current score: {} ({}). Candidate score: {} ({})".format(curr_score,current_state,next_score,next_cand))

            # Check if next_cand is best so far
            score_diff = next_score - curr_score

            # if the new solution is better, accept it
            move = False
            if score_diff > 0:
                move = True
            # if the new solution is not better, accept it with a probability of e^(-cost/temp)
            else:
                # print("chance to move (from score_diff {}): {}".format(score_diff,math.exp(score_diff / current_temp)))
                move = random.uniform(0, 1) < math.exp(score_diff / current_temp)                    
            if move:
                current_state = next_cand
                curr_score = next_score
                self.opt_results.append((current_state,curr_score))
            # decrement the temperature
            current_temp = self.temp_schedule(i)
            i += 1
            if i % 1000 == 0:
                print("i: {}".format(i))            

        return self.reset()



In [8]:
def optimize_scene(doc,mat=None):
    rc = mdb.get_scene_from_collection(doc['movie_id'],doc['scene_element'],'s1_lsmdc')
    experts = flatten(rc['experts'].values())
    sents = list(set(doc['combined_sentences']))
    if not sents:
        print("Empty list of sentences, aborting!")
        return []
    emb_video = clip.clip_encode_video(doc['movie_id'],doc['scene_element'])
    optim = SubsetOptimization(emb_video, experts, sents, coverage_matrix=mat)
    rc = optim.simulated_annealing([], reset_every=15000)
    coverage = list(zip(optim.experts, optim.get_expert_coverage(rc)))
    print("Coverage:")
    print(coverage)
    print("Similarity:")
    print("Mean of result: {}".format(optim.candidates_similarity[rc].mean()))
    list(zip(itemgetter(*rc)(optim.candidates_strings),optim.candidates_similarity[rc]))
    return list(itemgetter(*rc)(optim.candidates_strings)), optim
    



In [9]:
query = 'FOR doc IN s1_pipeline_results_phase2 RETURN doc'
cursor = pipeline.db.aql.execute(query)
all_docs = list(cursor)


In [10]:
# rc = clip.clip_batch_encode_text(["This is the first sentence"])
rc = clip.clip_batch_encode_text(["This is the first sentence", "I see an elephant", "One last drink to go please"])

In [11]:
rc.norm(dim=1)

tensor([0.9995, 1.0010, 1.0010], device='cuda:0', dtype=torch.float16)

In [12]:
rc1 = clip.clip_encode_text("This is the first sentence")
rc1.norm()

tensor(1., device='cuda:0', dtype=torch.float16)

In [13]:
doc = all_docs[1]
mid = doc['movie_id']
elem = doc['scene_element']

rc2 = clip.clip_encode_video(mid,elem)

http://ec2-18-159-140-240.eu-central-1.compute.amazonaws.com:7000/static/development/1038_The_Great_Gatsby_00_56_48_259-00_56_50_126.mp4
23.976023976023978
Movie info: {'arango_id': 'Movies/114207499', 'description': '1038_The_Great_Gatsby_00_56_48_259-00_56_50_126', 'fps': 23, 'width': 1920, 'height': 1080, 'last frame': 300, 'movie_id': 'd5d5f3307bef4d23aedb07f6c1f65b20', 'mdfs': [[2, 24, 46]], 'scene_elements': [[0, 48]]}
fn path: /tmp/file.mp4
/tmp/file.mp4
Scene:  0


In [77]:
rc, optim_local = optimize_scene(doc)

http://ec2-18-159-140-240.eu-central-1.compute.amazonaws.com:7000/static/development/1038_The_Great_Gatsby_00_56_48_259-00_56_50_126.mp4
23.976023976023978
Movie info: {'arango_id': 'Movies/114207499', 'description': '1038_The_Great_Gatsby_00_56_48_259-00_56_50_126', 'fps': 23, 'width': 1920, 'height': 1080, 'last frame': 300, 'movie_id': 'd5d5f3307bef4d23aedb07f6c1f65b20', 'mdfs': [[2, 24, 46]], 'scene_elements': [[0, 48]]}
fn path: /tmp/file.mp4
/tmp/file.mp4
Scene:  0
Computing batch similarity...
Done
i: 1000
i: 2000
i: 3000
i: 4000
i: 5000
i: 6000
i: 7000
i: 8000
i: 9000
i: 10000
i: 11000
i: 12000
i: 13000
i: 14000
i: 15000
Reset to state: [17, 43, 46, 92, 102]
i: 16000
i: 17000
i: 18000
i: 19000
i: 20000
i: 21000
i: 22000
i: 23000
i: 24000
i: 25000
i: 26000
i: 27000
i: 28000
i: 29000
i: 30000
Reset to state: [17, 43, 46, 92, 102]
i: 31000
i: 32000
i: 33000
i: 34000
i: 35000
i: 36000
i: 37000
i: 38000
i: 39000
i: 40000
i: 41000
i: 42000
i: 43000
i: 44000
i: 45000
Reset to state: [

In [78]:
rc, len(rc)

(['The man in the suit and tie is looking tense as he sits next to a woman in a green jacket.',
  'The man in the suit and tie sits and holds a cup of coffee while a woman in a green jacket stands next to him.',
  'The man in the suit and tie sits and holds a cup of coffee as he looks at the woman in the green jacket.',
  'The man in the suit and tie holds a cup of coffee and looks at the green jacket with flowers on it.',
  "The man in the suit and tie looks tense as he holds a cup of coffee in his hand. He's standing next to a woman who's wearing a green jacket."],
 5)

In [79]:
ind = optim_local.candidates_similarity.argsort()[-5:]
cands = np.array(optim_local.candidates_strings)[ind]
scores = optim_local.candidates_similarity[ind]
all_top = list(zip(cands,scores))
print([x in list(cands) for x in rc], len(optim_local.candidates_strings), optim_local.candidates_similarity.mean(), optim_local.candidates_similarity.max())
print(optim_local.candidates_similarity[ind])

[False, True, True, True, False] 108 0.2147 0.26
[0.2448 0.2455 0.2462 0.257  0.26  ]


In [None]:
for doc in all_docs:
    mid = doc['movie_id']
    elem = doc['scene_element']
    rc = nre.get_scene_from_collection(mid,elem,'s1_pipeline_results_final')
    if rc:
        print("Results already exist for {}/{}".format(mid,elem))
        continue
    print("Going forward with {}/{}".format(mid,elem))
    rc = optimize_scene(doc)
    rc_doc = {
        'movie_id': mid,
        'scene_element': elem,
        'sentences': rc
    }
    query = "INSERT {} INTO s1_pipeline_results_final".format(rc_doc)
    cursor = nre.db.aql.execute(query)    