In [1]:
import os
import glob
import pandas as pd
import lpips
import math
import plotly.express as px
import numpy as np
from natsort import natsorted
import cv2
from PIL import Image
import torchvision.transforms as transforms
import scipy.linalg as linalg

In [2]:
loss_fn_alex = lpips.LPIPS(net='alex')
loss_fn_vgg = lpips.LPIPS(net='vgg')

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Loading model from: /Users/imrankabir/Desktop/research/vqa_accessibility/Dashboard-For-VQA/venv/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth
Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]




Loading model from: /Users/imrankabir/Desktop/research/vqa_accessibility/Dashboard-For-VQA/venv/lib/python3.9/site-packages/lpips/weights/v0.1/vgg.pth


In [3]:
images_dir = '/Users/imrankabir/Desktop/research/vqa_accessibility/Dashboard-For-VQA/Dashboard Data/Images'
data_path = '/Users/imrankabir/Desktop/research/vqa_accessibility/Dashboard-For-VQA/Dashboard Data'

In [4]:
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

transform_f = transforms.ToTensor()

def normalize_image(in_img):
    pixels = np.asarray(in_img).astype('float32')
    pixels = (pixels - mean) / std
    return pixels

In [5]:
def get_steady_state_probabilities_ifs(pred, img_dir, vid_n, seg_n):
    pred = pred.T
    unq_st = np.unique(pred, axis=0)
    unq_st_str = []
    for x in unq_st:
        str_bit = [str(ch) for ch in x]
        unq_st_str.append(''.join(str_bit))

    transition_matrix_dict = {
        'st': [x for x in unq_st_str]
    }
    for u_s_s in unq_st_str:
        transition_matrix_dict[u_s_s] = [0.0 for _ in unq_st_str]

    transition_matrix = pd.DataFrame(transition_matrix_dict)
    transition_matrix = transition_matrix.set_index('st')

    for f in range(1, pred.shape[0]):
        s_now = ''.join([str(ch) for ch in pred[f]])
        s_prev = ''.join([str(ch) for ch in pred[f-1]])
        f_now_pth = os.path.join(img_dir, f'video-{vid_n}-segment-{seg_n}-frame-{f}.jpeg')
        f_prev_pth = os.path.join(img_dir, f'video-{vid_n}-segment-{seg_n}-frame-{f-1}.jpeg')

        image_now = cv2.resize(normalize_image(np.array(Image.open(
            f_now_pth
        ).convert('RGB'))/255), (64, 64), interpolation = cv2.INTER_LINEAR).astype(np.float32)
        image_prev = cv2.resize(normalize_image(np.array(Image.open(
            f_prev_pth
        ).convert('RGB'))/255), (64, 64), interpolation = cv2.INTER_LINEAR).astype(np.float32)

        img0 = transform_f(image_now).unsqueeze(0)
        img1 = transform_f(image_prev).unsqueeze(0)

        d = loss_fn_alex(img0, img1).detach().numpy()[0,0,0,0]

        transition_matrix[s_prev][s_prev] += (1.0*d)
        transition_matrix[s_now][s_prev] += (1.0*(1-d))

    for ind, row in transition_matrix.iterrows():
        row = row/(row.sum()+1e-15)
        transition_matrix.loc[ind] = row

    transition_matrix = np.array(transition_matrix)

    I = np.identity(transition_matrix.shape[0])
    P_I = transition_matrix - I
    co_eff = P_I.T

    co_eff[co_eff.shape[0]-1] =  np.ones((co_eff.shape[1]))
    const = np.array([0.0 for _ in range(co_eff.shape[0])])
    const[const.shape[0]-1] = 1.0

    p_s_ifs = np.linalg.solve(co_eff, const)

    return p_s_ifs

In [6]:
def get_steady_state_probabilities(h_m):
    hm = np.array(h_m).T
    unique_states = np.unique(hm, axis=0)
    unq_st_and_count = {}
    for i, u_s in enumerate(unique_states):
        c = np.argwhere(np.all(hm == u_s, axis=-1)).shape[0]
        unq_st_and_count[i] = {
            'val': u_s,
            'count': c,
            'ss_prob': c/hm.shape[0]
        }

    return np.array([unq_st_and_count[k]['ss_prob'] for k in unq_st_and_count.keys()])


def calculate_entropy(ss_probs):
    if len(ss_probs) <= 1:
        return 0.0

    tot_ss_ent = 0

    for prb in ss_probs:
        if prb == 0:
            log_p_ss = 0
        else:
            log_p_ss = math.log2(prb)

        t_ent = - prb * log_p_ss

        tot_ss_ent = tot_ss_ent + t_ent

    tot_ss_ent = tot_ss_ent / math.log2(len(ss_probs))

    return tot_ss_ent

In [7]:
def get_st_p_ent(vid, seg, img_path, objs, model):
    pred_file = os.path.join(
        data_path,
        f'{model}/video-{vid}-segment-{seg}.csv'
    )
    pred_df = pd.read_csv(pred_file)
    pred_df = pred_df.transpose()
    pred_df.columns = pred_df.iloc[0]
    pred_df = pred_df.iloc[1:]
    pred_df.columns = map(str.lower, pred_df.columns)
    if
    pred_df = pred_df.reindex(columns=objs).fillna('0').transpose()

    p_steady_ifs = get_steady_state_probabilities_ifs(np.array(pred_df), images_dir, vid, seg)
    ent_ifs = calculate_entropy(p_steady_ifs)

    p_steady = get_steady_state_probabilities(np.array(pred_df))
    ent = calculate_entropy(p_steady)

    return p_steady, ent, p_steady_ifs, ent_ifs

In [8]:
v_ = 2
s_ = 1

coco_common_obj = ['person', 'bicycle', 'car', 'motorcycle', 'bus', 'traffic signals', 'fire hydrant', 'stop sign'
                   'bench', 'dog', 'chair', 'vegetation']

pfb_common_obj = ['road', 'sidewalk', 'tree', 'vegetation', 'building', 'fence', 'traffic signals',
                  'fire hydrant', 'chair', 'trash on roads', 'trash bins', 'person', 'car', 'motorcycle',
                  'bus']

ram_obj_map = {
    'chair': 'chair',
    'pillar': 'pillar',
    'table': 'table',
    'person': 'person',
    'man': 'person',
    'building': 'building',
    'city street': 'road',
    'curb': 'curb',
    'pavement': 'sidewalk',
    'road': 'road',
    'car': 'car',
    'snow': 'snow',
    'doorway': 'sloped driveway',
    'elevator': 'elevator',
    'rail': 'train tracks',
    'stair': 'stairs',
    'cane': 'white cane',
    'door': 'flush door',
    'fence': 'fence',
    'barrier': 'barrier post',
    'bench': 'bench',
    'sign': 'sign',
    'bin': 'trash bins',
    'pole': 'pole',
    'street vendor': 'street vendor',
    'blind': 'person with a disability',
    'dog': 'dog',
    'escalator': 'escalator',
    'street sign': 'sign post',
    'bus stop': 'bus stop',
    'railway station': 'train platform',
    'tree': 'tree',
    'traffic light': 'traffic signals',
    'tree trunk': 'tree',
    'recycling bin': 'trash bins',
    'train track': 'train tracks',
    'pedestrian': 'person',
    'bus': 'bus',
    'city bus': 'bus',
    'tour bus': 'bus',
    'wall': 'wall',
    'elevator door': 'elevator',
    'bicycle': 'bicycle',
    'crosswalk': 'crosswalk',
    'decker bus': 'bus',
    'motorcycle': 'motorcycle',
    'motorcyclist': 'person',
    'biker': 'person',
    'motorbike': 'motorcycle',
    'warning sign': 'sign',
    'hydrant': 'fire hydrant',
    'school bus': 'bus',
    'vegetation': 'vegetation',
    'fountain': 'fountain'
}

ram_com_obj = list(set(list(ram_obj_map.values())))

print(len(ram_com_obj))

39


In [9]:
model_ = 'BLIP'
p_s, e_, p_s_i, e_i = get_st_p_ent(v_, s_, images_dir, ram_com_obj, model_)
print(p_s, e_)
print(p_s_i, e_i)

[0.1875     0.02083333 0.02083333 0.02083333 0.02083333 0.02083333
 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333
 0.02083333 0.04166667 0.02083333 0.02083333 0.02083333 0.02083333
 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333
 0.02083333 0.04166667 0.02083333 0.04166667 0.02083333 0.04166667
 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333] 0.9330763782720773
[ 0.30461255 -0.          0.02713445  0.01981485 -0.         -0.
  0.0328566  -0.          0.021557    0.03153359  0.02516728 -0.
 -0.          0.04254843  0.03399764  0.0240102   0.02180371  0.0325708
  0.02595691  0.03384455 -0.         -0.         -0.         -0.
 -0.          0.04606739  0.04061     0.04204952 -0.          0.04825959
  0.03649742  0.0332447   0.02127787  0.0217299  -0.          0.03285506] 0.7646642432775831


In [11]:
model_ = 'GPV-1'
p_s, e_, p_s_i, e_i = get_st_p_ent(v_, s_, images_dir, ram_com_obj, model_)
print(p_s, e_)
print(p_s_i, e_i)

[0.0625     0.02083333 0.02083333 0.02083333 0.02083333 0.02083333
 0.02083333 0.04166667 0.02083333 0.02083333 0.02083333 0.02083333
 0.02083333 0.04166667 0.02083333 0.02083333 0.02083333 0.04166667
 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333
 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333
 0.02083333 0.02083333 0.02083333 0.04166667 0.02083333 0.02083333
 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333 0.02083333] 0.9864470617566723
[ 0.09878198  0.05248973  0.03274158  0.0314793   0.03711434 -0.
  0.04208139  0.06598613  0.03898865  0.03435076 -0.          0.05248774
 -0.          0.06521239  0.03299874  0.05155746 -0.          0.0714435
 -0.         -0.         -0.          0.03343162 -0.          0.03072981
 -0.         -0.         -0.         -0.          0.06297991 -0.
  0.03369976 -0.         -0.          0.07484327 -0.         -0.
 -0.         -0.         -0.         -0.         -0.          0.05660194] 0.7848963529233669


In [12]:
model_ = 'RAM'
p_s, e_, p_s_i, e_i = get_st_p_ent(v_, s_, images_dir, ram_com_obj, model_)
print(p_s, e_)
print(p_s_i, e_i)

[0.66666667 0.04166667 0.04166667 0.02083333 0.02083333 0.02083333
 0.0625     0.02083333 0.02083333 0.02083333 0.02083333 0.04166667] 0.5655760724651602
[0.66962948 0.03068688 0.04943406 0.02161422 0.02075942 0.02310563
 0.06199445 0.02179995 0.02180388 0.0219827  0.02028933 0.03689999] 0.562771966998245
