#### **Import Libraries**

In [None]:
import os
import json
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from lavis.models import load_model_and_preprocess

In [None]:
%cd /content/drive/MyDrive/AIC-Minitest/dataset

#### **Parsing Data Path**

In [None]:
def parse_data_path(feature_dir='./keyframe'):
    all_feature_paths = dict()
    for feature_part in sorted(os.listdir(feature_dir)):
        all_feature_paths[feature_part] = dict()
    for feature_part in sorted(all_feature_paths.keys()):
        feature_part_path = f'{feature_dir}/{feature_part}'
        feature_paths = sorted(os.listdir(feature_part_path))
        feature_ids = [feature_path.split('.')[0] for feature_path in feature_paths]
        for feature_id, feature_path in zip(feature_ids, feature_paths):
            feature_path_full = f'{feature_part_path}/{feature_path}'
            all_feature_paths[feature_part][feature_id] = feature_path_full
    return all_feature_paths

In [None]:
all_video_paths = parse_data_path(feature_dir='./distilled_keyframe')

#### **BLIP2-ViT-L Model**

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model, vis_processors, txt_processors = load_model_and_preprocess(name="blip2_feature_extractor", model_type="pretrain_vitL", is_eval=True, device=device)

In [None]:
def encode_images(image_paths):
    id2image_fps = {}
    video_features = []
    for id, image_path in enumerate(image_paths):
        id2image_fps[id] = image_path
        raw_image = Image.open(image_path).convert("RGB")
        image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
        image_features = model.extract_features({"image": image}, mode="image")
        image_feature = image_features.image_embeds_proj
        image_feature /= image_feature.norm(dim=-1, keepdim=True)
        image_feature = image_feature.squeeze(dim=0)
        for index in range(image_feature.shape[0]):
          video_features.append(image_feature[index].cpu().numpy().astype(np.float32).flatten())
    return id2image_fps, video_features

#### **Inference**

In [None]:
def sorted_by_id(keyframe_paths):
    id_path_keyframes = []
    for keyframe_path in keyframe_paths:
        keyframe_filename = keyframe_path.split('/')[-1]
        keyframe_id = int(keyframe_filename.split('.')[0])
        id_path_keyframes.append((keyframe_id, keyframe_path))
    sorted_id_path_keyframes = sorted(id_path_keyframes, key=lambda id_path: id_path[0])
    return [id_path[1] for id_path in sorted_id_path_keyframes]

In [None]:
id2image_save_dir='./blip/blip2-vitL/id2image'
feature_save_dir="./blip/blip2-vitL/features"
if not os.path.exists(id2image_save_dir):
    os.makedirs(id2image_save_dir)
if not os.path.exists(feature_save_dir):
    os.makedirs(feature_save_dir)

In [None]:
for video_part, video_path_dict in all_video_paths.items():
    video_ids = video_path_dict.keys()
    for video_id in tqdm(video_ids, desc=f'Encoding Part {video_part}'):
        video_id_path = video_path_dict[video_id]
        keyframe_image_paths = [video_id_path + '/' + keyframe_image_path for keyframe_image_path in os.listdir(video_id_path)]
        sorted_keyframe_image_paths = sorted_by_id(keyframe_image_paths)
        id2image_fps, video_features = encode_images(sorted_keyframe_image_paths)

        os.makedirs(f'{feature_save_dir}/{video_part}', exist_ok=True)
        np.save(f'{feature_save_dir}/{video_part}/{video_id}.npy', video_features)

        os.makedirs(f'{id2image_save_dir}/{video_part}', exist_ok=True)
        with open(f'{id2image_save_dir}/{video_part}/{video_id}.json', 'w') as f:
            json.dump(id2image_fps, f, ensure_ascii=False, indent=4)