In [1]:
import os
import glob
import pandas as pd
import lpips
import math
import plotly.express as px
import numpy as np
from natsort import natsorted
import cv2
from PIL import Image
import torchvision.transforms as transforms
import scipy.linalg as linalg

In [2]:
loss_fn_alex = lpips.LPIPS(net='alex')
loss_fn_vgg = lpips.LPIPS(net='vgg')

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Loading model from: /Users/imrankabir/Desktop/research/vqa_accessibility/Dashboard-For-VQA/venv/lib/python3.9/site-packages/lpips/weights/v0.1/alex.pth
Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]




Loading model from: /Users/imrankabir/Desktop/research/vqa_accessibility/Dashboard-For-VQA/venv/lib/python3.9/site-packages/lpips/weights/v0.1/vgg.pth


In [3]:
images_dir = '/Users/imrankabir/Desktop/research/vqa_accessibility/Dashboard-For-VQA/Dashboard Data/Images'
data_path = '/Users/imrankabir/Desktop/research/vqa_accessibility/Dashboard-For-VQA/Dashboard Data'

In [4]:
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

transform_f = transforms.ToTensor()

def normalize_image(in_img):
    pixels = np.asarray(in_img).astype('float32')
    pixels = (pixels - mean) / std
    return pixels

In [5]:
def get_steady_state_probabilities_ifs(pred, img_dir, vid_n, seg_n):
    pred = pred.T
    unq_st = np.unique(pred, axis=0)
    unq_st_str = []
    for x in unq_st:
        str_bit = [str(ch) for ch in x]
        unq_st_str.append(''.join(str_bit))

    transition_matrix_dict = {
        'st': [x for x in unq_st_str]
    }
    for u_s_s in unq_st_str:
        transition_matrix_dict[u_s_s] = [0.0 for _ in unq_st_str]

    transition_matrix = pd.DataFrame(transition_matrix_dict)
    transition_matrix = transition_matrix.set_index('st')

    for f in range(1, pred.shape[0]):
        s_now = ''.join([str(ch) for ch in pred[f]])
        s_prev = ''.join([str(ch) for ch in pred[f-1]])
        f_now_pth = os.path.join(img_dir, f'video-{vid_n}-segment-{seg_n}-frame-{f}.jpeg')
        f_prev_pth = os.path.join(img_dir, f'video-{vid_n}-segment-{seg_n}-frame-{f-1}.jpeg')

        image_now = cv2.resize(normalize_image(np.array(Image.open(
            f_now_pth
        ).convert('RGB'))/255), (64, 64), interpolation = cv2.INTER_LINEAR).astype(np.float32)
        image_prev = cv2.resize(normalize_image(np.array(Image.open(
            f_prev_pth
        ).convert('RGB'))/255), (64, 64), interpolation = cv2.INTER_LINEAR).astype(np.float32)

        img0 = transform_f(image_now).unsqueeze(0)
        img1 = transform_f(image_prev).unsqueeze(0)

        d = loss_fn_alex(img0, img1).detach().numpy()[0,0,0,0]

        transition_matrix[s_prev][s_prev] += (1.0*d)
        transition_matrix[s_now][s_prev] += (1.0*(1-d))

    for ind, row in transition_matrix.iterrows():
        row = row/(row.sum()+1e-15)
        transition_matrix.loc[ind] = row

    transition_matrix = np.array(transition_matrix)

    I = np.identity(transition_matrix.shape[0])
    P_I = transition_matrix - I
    co_eff = P_I.T

    co_eff[co_eff.shape[0]-1] =  np.ones((co_eff.shape[1]))
    const = np.array([0.0 for _ in range(co_eff.shape[0])])
    const[const.shape[0]-1] = 1.0

    p_s_ifs = np.linalg.solve(co_eff, const)

    return p_s_ifs

In [6]:
def get_steady_state_probabilities(h_m):
    hm = np.array(h_m).T
    unique_states = np.unique(hm, axis=0)
    unq_st_and_count = {}
    for i, u_s in enumerate(unique_states):
        c = np.argwhere(np.all(hm == u_s, axis=-1)).shape[0]
        unq_st_and_count[i] = {
            'val': u_s,
            'count': c,
            'ss_prob': c/hm.shape[0]
        }

    return np.array([unq_st_and_count[k]['ss_prob'] for k in unq_st_and_count.keys()])


def calculate_entropy(ss_probs):
    if len(ss_probs) <= 1:
        return 0.0

    tot_ss_ent = 0

    for prb in ss_probs:
        if prb == 0:
            log_p_ss = 0
        else:
            log_p_ss = math.log2(prb)

        t_ent = - prb * log_p_ss

        tot_ss_ent = tot_ss_ent + t_ent

    tot_ss_ent = tot_ss_ent / math.log2(len(ss_probs))

    return tot_ss_ent

In [7]:
def get_st_p_ent(vid, seg, img_path, objs, model):
    pred_file = os.path.join(
        data_path,
        f'{model}/video-{vid}-segment-{seg}.csv'
    )
    pred_df = pd.read_csv(pred_file)
    pred_df = pred_df.transpose()
    pred_df.columns = pred_df.iloc[0]
    pred_df = pred_df.iloc[1:]
    pred_df = pred_df.reindex(columns=objs).fillna('0').transpose()

    p_steady_ifs = get_steady_state_probabilities_ifs(np.array(pred_df), images_dir, vid, seg)
    ent_ifs = calculate_entropy(p_steady_ifs)

    p_steady = get_steady_state_probabilities(np.array(pred_df))
    ent = calculate_entropy(p_steady)

    return p_steady, ent, p_steady_ifs, ent_ifs

In [8]:
v_ = 2
s_ = 1

objs_ = [
    'Car', 'Person', 'Curb', 'Tree', 'Barrier Post'
]


In [9]:
model_ = 'BLIP'
p_s, e_, p_s_i, e_i = get_st_p_ent(v_, s_, images_dir, objs_, model_)
print(p_s, e_)
print(p_s_i, e_i)

[0.1875     0.02083333 0.02083333 0.04166667 0.0625     0.1875
 0.02083333 0.29166667 0.16666667] 0.8344157297354335
[0.13042024 0.0263023  0.01614984 0.04423116 0.07952279 0.24763723
 0.02174713 0.24342108 0.19056823] 0.844704382064666


In [10]:
model_ = 'GPV-1'
p_s, e_, p_s_i, e_i = get_st_p_ent(v_, s_, images_dir, objs_, model_)
print(p_s, e_)
print(p_s_i, e_i)

[0.08333333 0.04166667 0.02083333 0.0625     0.10416667 0.08333333
 0.16666667 0.04166667 0.04166667 0.04166667 0.3125    ] 0.8738809887518927
[0.06469828 0.04355108 0.01075841 0.06911155 0.10472037 0.10283319
 0.17364232 0.03000038 0.03639326 0.04142975 0.32286142] 0.8523952496012203
