In [None]:
import torch
print(torch.__version__)
from torchvision.models import resnet50, ResNet50_Weights, resnet152, ResNet152_Weights, AlexNet_Weights, VGG16_BN_Weights
from torchvision.models.feature_extraction import get_graph_node_names
from torchvision.models.feature_extraction import create_feature_extractor
import glob
import os
from PIL import Image
import numpy as np
import cv2
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader,Dataset
from skimage.metrics import structural_similarity as ssim
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd

In [None]:
m_50 = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2) #  resnet50()
m_152 = resnet152(weights=ResNet152_Weights.IMAGENET1K_V2)

In [None]:
m_alex = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', weights=AlexNet_Weights.IMAGENET1K_V1)
m_vgg16_bn = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16_bn', weights=VGG16_BN_Weights.IMAGENET1K_V1)
# train_nodes, eval_nodes = get_graph_node_names(m_vgg16_bn)
# m_vgg16_bn

In [None]:
return_nodes_resnet = {
    'avgpool': 'out',
}
return_node_alex = {
    'classifier.5': 'out'
}
return_node_vgg = {
    'classifier.5': 'out'
}

model_50 = create_feature_extractor(m_50, return_nodes=return_nodes_resnet).to("cuda:0")
model_152 = create_feature_extractor(m_152, return_nodes=return_nodes_resnet).to("cuda:0")
model_alex = create_feature_extractor(m_alex, return_nodes=return_node_alex).to("cuda:0")
model_vgg16_bn = create_feature_extractor(m_vgg16_bn, return_nodes=return_node_vgg).to("cuda:0")

In [None]:
mean = [0.485, 0.456, 0.406] 
std = [0.229, 0.224, 0.225]

transform_norm = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Resize((224,224)),
    transforms.Normalize(mean, std)]
)

In [None]:
def mse(imageA, imageB):
    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])
    return err

def compare_images(imageA, imageB):
    m = mse(imageA, imageB)
    s = ssim(imageA, imageB)
    return m, s

In [None]:
csv = "/kaggle/input/elene-videos/measures.csv"
df_measure = pd.read_csv(csv)
df_measure.head()

In [None]:
# fl = "/kaggle/input/vqa-video-gt/video_14_segment_4.csv"
# df_gt = pd.read_csv(fl)
# df_gt.dropna(how='all', axis=1, inplace=True)
# df_gt.dropna(how='all', axis=0, inplace=True)
# df_gt_columns = df_gt.columns[1:]

# df_gt

In [None]:
csv_folder = "/kaggle/input/elene-videos/GPV-1 Answers (Combined In Segments)-20230401T085540Z-001/GPV-1 Answers (Combined In Segments)"
gt_folder = "/kaggle/input/vqa-video-gt"
final_dict = {}
for vid_count in range(1, 17):
    parent_video = str(vid_count).zfill(3)
    video_folder = f"/kaggle/input/elene-videos/accss_videos_elena-20230320T061114Z-001/accss_videos_elena/video_{parent_video}/trimmed_video"
    
    k_f_folds = ["_".join(k_i.split('_')[1:]).replace('.mp4', '') for k_i in os.listdir(video_folder) if 'keyframes' in k_i]
    
    for video_name in k_f_folds:
        segment = int(video_name.split('_')[-1].replace('seg', '').replace('.mp4', ''))
        print(f"Video: {vid_count}, Segment: {segment}")

        key_frame_paths = os.path.join(
            video_folder,
            f'keyframes_{video_name}.mp4'
        )

        tn = len(os.listdir(key_frame_paths)) + 1

        val = "os.path.join(key_frame_paths, f'{video_name}_{x}.jpeg')"
        tester = f"os.path.exists({val})"

        images = [eval(val) for x in range(tn) if eval(tester)]
        out_resnet_50 = []
        out_resnet_152 = []
        out_alex = []
        out_vgg16_bn = []
        out_mse = []
        out_ssim = []
        out_feat_cos = []
        gt_feat_cos = []
        
        csv_file = os.path.join(
            csv_folder,
            f"video-{vid_count}-segment-{segment}.csv"
        )
        
        df = pd.read_csv(csv_file)
        df_columns = df.columns[1:]
        
        gt_file = os.path.join(
            gt_folder,
            f"video_{vid_count}_segment_{segment}.csv"
        )
        
        if not os.path.exists(gt_file):
            continue
        
        df_gt = pd.read_csv(gt_file)
        df_gt.dropna(how='all', axis=1, inplace=True)
        df_gt.dropna(how='all', axis=0, inplace=True)
        df_gt_columns = df_gt.columns[1:]
        
        df_measure_slice = df_measure.loc[(df_measure['video'] == vid_count) & (df_measure['segment'] == segment)]
        out_feat = list(df_measure_slice['similarity-score'])
        
        for col_no in tqdm(range(len(df_gt_columns)-1)):
            col_1 = torch.FloatTensor(list(df_gt[df_gt_columns[col_no]]))
            col_2 = torch.FloatTensor(list(df_gt[df_gt_columns[col_no+1]]))
            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-8)
            gt_feat = cos(col_1, col_2)
            gt_feat_cos.append(float(gt_feat.cpu().numpy()))
        
        for col_no in tqdm(range(len(df_columns)-1)):
            col_1 = torch.FloatTensor(list(df[df_columns[col_no]]))
            col_2 = torch.FloatTensor(list(df[df_columns[col_no+1]]))
            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-8)
            output_feat = cos(col_1, col_2)
            out_feat_cos.append(float(output_feat.cpu().numpy()))

        for i in tqdm(range(len(images)-1)):
            image_1 = np.array(Image.open(images[i]))
            image_2 = np.array(Image.open(images[i+1]))
            outs_50 = []
            for img in [image_1, image_2]:
                img_normalized = transform_norm(img).float()
                img_normalized = img_normalized.unsqueeze_(0)
                img_normalized = img_normalized.to("cuda:0")
                with torch.no_grad():
                    model_50.eval()  
                    output_50 = model_50(img_normalized)["out"].squeeze_(0).squeeze_(1).squeeze_(1)
                    outs_50.append(output_50)

            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-8)
            output_50 = cos(outs_50[0], outs_50[1])
            out_resnet_50.append(float(output_50.cpu().numpy()))

        for i in tqdm(range(len(images)-1)):
            image_1 = np.array(Image.open(images[i]))
            image_2 = np.array(Image.open(images[i+1]))
            outs_152 = []
            for img in [image_1, image_2]:
                img_normalized = transform_norm(img).float()
                img_normalized = img_normalized.unsqueeze_(0)
                img_normalized = img_normalized.to("cuda:0")
                with torch.no_grad():
                    model_152.eval()   
                    output_152 = model_152(img_normalized)["out"].squeeze_(0).squeeze_(1).squeeze_(1)
                    outs_152.append(output_152)

            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-8)
            output_152 = cos(outs_152[0], outs_152[1])
            out_resnet_152.append(float(output_152.cpu().numpy()))

        for i in tqdm(range(len(images)-1)):
            image_1 = np.array(Image.open(images[i]))
            image_2 = np.array(Image.open(images[i+1]))
            outs_alex = []
            for img in [image_1, image_2]:
                img_normalized = transform_norm(img).float()
                img_normalized = img_normalized.unsqueeze_(0)
                img_normalized = img_normalized.to("cuda:0")
                with torch.no_grad(): 
                    model_alex.eval()  
                    output_alex = model_alex(img_normalized)["out"].squeeze_(0)
                    outs_alex.append(output_alex)

            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-8)
            output_alex = cos(outs_alex[0], outs_alex[1])
            out_alex.append(float(output_alex.cpu().numpy()))

        for i in tqdm(range(len(images)-1)):
            image_1 = np.array(Image.open(images[i]))
            image_2 = np.array(Image.open(images[i+1]))
            outs_vgg16_bn = []
            for img in [image_1, image_2]:
                img_normalized = transform_norm(img).float()
                img_normalized = img_normalized.unsqueeze_(0)
                img_normalized = img_normalized.to("cuda:0")
                with torch.no_grad(): 
                    model_vgg16_bn.eval()  
                    output_vgg16_bn = model_vgg16_bn(img_normalized)["out"].squeeze_(0)
                    outs_vgg16_bn.append(output_vgg16_bn)

            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-8)
            output_vgg16_bn = cos(outs_vgg16_bn[0], outs_vgg16_bn[1])
            out_vgg16_bn.append(float(output_vgg16_bn.cpu().numpy()))

        for i in tqdm(range(len(images)-1)):
            image_1 = np.array(Image.open(images[i]))
            image_2 = np.array(Image.open(images[i+1]))

            k_f_img = cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY)
            o_f_img = cv2.cvtColor(image_2, cv2.COLOR_BGR2GRAY)
            mse_, ssim_ = compare_images(k_f_img, o_f_img)
            out_mse.append(mse_)
            out_ssim.append(ssim_)
            
        if vid_count not in final_dict.keys():
            final_dict[vid_count] = {}

        assert len(out_feat) == len(out_ssim)
        assert len(gt_feat_cos) == len(out_ssim)
        
        final_dict[vid_count][segment] = {
            'network': [f'frame-{f}' for f in range(1, len(out_feat)+1)],
            'gt-feature': gt_feat_cos,
            'gpv-feature': out_feat,
            'gpv-feature-cos': out_feat_cos,
            'resnet-50': out_resnet_50,
            'resnet-152': out_resnet_152,
            'alexnet': out_alex,
            'vgg16-bn': out_vgg16_bn,
            'ssim': out_ssim 
        }


In [None]:
!rm -rf ./*
os.makedirs('./csv')
os.makedirs('./csv_t')
os.makedirs('./plot')
os.makedirs('./plot_corr')

In [None]:
import json

with open("video_correlation_data.json", 'w') as f:
    f.write(json.dumps(final_dict, indent=4))

In [None]:
for vid in final_dict.keys():
    for seg in final_dict[vid].keys():
        data = final_dict[vid][seg]
        # data["frame"] = [f for f in range(1, len(final_dict[vid][seg]["feature"])+1)]
        df = pd.DataFrame(data).T
        df.to_csv(f'csv_t/video-{vid}-segment-{seg}.csv', header=False)

In [None]:
for vid in final_dict.keys():
    for seg in final_dict[vid].keys():
        data = final_dict[vid][seg]
        # data["frame"] = [f for f in range(1, len(final_dict[vid][seg]["feature"])+1)]
        df = pd.DataFrame(data)
        df.to_csv(f'csv/video-{vid}-segment-{seg}.csv')

In [None]:
# for vid in final_dict.keys():
#     for seg in final_dict[vid].keys():
#         data = final_dict[vid][seg]
#         fig = plt.figure(figsize=(12, 8))
#         linestyle_dict = {
#              'solid': 'solid',      # Same as (0, ()) or '-'
#              'dotted': 'dotted',    # Same as (0, (1, 1)) or ':'
#              'dashed': 'dashed',    # Same as '--'
#              'dashdot': 'dashdot',
#
#              'loosely dotted':        (0, (1, 10)),
#              'dotted':                (0, (1, 1)),
#              'densely dotted':        (0, (1, 1)),
#              'long dash with offset': (5, (10, 3)),
#              'loosely dashed':        (0, (5, 10)),
#              'dashed 2':              (0, (5, 5)),
#              'densely dashed':        (0, (5, 1)),
#
#              'loosely dashdotted':    (0, (3, 10, 1, 10)),
#              'dashdotted':            (0, (3, 5, 1, 5)),
#              'densely dashdotted':    (0, (3, 1, 1, 1)),
#
#              'dashdotdotted':         (0, (3, 5, 1, 5, 1, 5)),
#              'loosely dashdotdotted': (0, (3, 10, 1, 10, 1, 10)),
#              'densely dashdotdotted': (0, (3, 1, 1, 1, 1, 1))
#         }
#
#         legends = ["resnet-50", 'resnet-152', 'alexnet', 'vgg16-bn', 'SSIM', 'gpv-feature', 'gt-feature']
#
#         plt.plot(data['network'], data['resnet-50'], color='green', linestyle=linestyle_dict['solid'], marker='o', markerfacecolor='green', markersize=0)
#         plt.plot(data['network'], data['resnet-152'], color='blue', linestyle=linestyle_dict['dotted'], marker='o', markerfacecolor='blue', markersize=0)
#         plt.plot(data['network'], data['alexnet'], color='orange', linestyle=linestyle_dict['dashed'], marker='o', markerfacecolor='orange', markersize=0)
#         plt.plot(data['network'], data['vgg16-bn'], color='red', linestyle=linestyle_dict['dashdot'], marker='o', markerfacecolor='red', markersize=0)
#         plt.plot(data['network'], data['ssim'], color='gray', linestyle=linestyle_dict['dashdot'], marker='o', markerfacecolor='red', markersize=0)
#         plt.plot(data['network'], data['gpv-feature-cos'], color='black', linestyle=linestyle_dict['densely dashdotted'], marker='o', markerfacecolor='black',
#                  markersize=0, linewidth=3)
#         plt.plot(data['network'], data['gt-feature'], color='purple', linestyle=linestyle_dict['densely dashdotdotted'], marker='o', markerfacecolor='purple',
#                  markersize=0, linewidth=3)
#
#         plt.legend(legends, bbox_to_anchor=(1.00, 1.00), ncol=1)
#         plt.xticks(rotation = 90)
#         plt.title(f'video-{vid}-segment-{seg}.png')
#
#         plt.ylim([0, 1.0])
#
#         plt.tight_layout()
#         plt.savefig(f'plot/video-{vid}-segment-{seg}.png')
#         plt.show()

In [None]:
# ! pip install kaleido

In [None]:
# ! conda install -c conda-forge python-kaleido -y
# ! pip uninstall kaleido
# ! pip install -q condacolab
# import condacolab
# condacolab.install()

In [None]:
# import plotly.graph_objects as go
# import plotly.express as px
#
# for vid in final_dict.keys():
#     for seg in final_dict[vid].keys():
#         data = final_dict[vid][seg]
#
#         layout = dict(plot_bgcolor='white',
#                       title=f'video-{vid}-segment-{seg}.png',
#                       width=500,
#                       height=500,
#                       margin=dict(t=30, l=30, r=30, b=30),
#                       xaxis=dict(title='gt Feature Similarity',
#                                  range=[0.0, 1.0],
#                                  linecolor='#d9d9d9',
#                                  showgrid=False,
#                                  mirror=True),
#                       yaxis=dict(title='GPV Feature Similarity',
#                                  range=[0.0, 1.0],
#                                  linecolor='#d9d9d9',
#                                  showgrid=False,
#                                  mirror=True))
#
#         fig_tmp = px.scatter(x=data['gt-feature'],
#                           y=data['gpv-feature'],
#                           trendline='ols')
#
#         fig = go.Figure(data=fig_tmp.data, layout=layout)
#
#         fig.show()
#         # fig.write_image(f'plot_corr/video-{vid}-segment-{seg}-corr.png')
#         img_bytes = fig.to_image(format="png")
#
#         with open(f'plot_corr/video-{vid}-segment-{seg}-corr.png', 'wb') as f:
#             f.write(img_bytes)