In [None]:
# Instal dependency
%pip install git+https://github.com/openai/CLIP.git

In [None]:
# Import module
import os
import clip
import glob
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm

# Parse data path

In [None]:
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

SCOPES = 'https://www.googleapis.com/auth/drive'
CLIENT_SECRET = '../credentials.json'
keyframes_dir_id = '1bqJG0CRIIuVIib3pBcA2k8iiRyWlwmq9'

flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET, SCOPES)
creds = flow.run_local_server(port=8502)
service = build('drive', 'v3', credentials=creds)

def list_folder_contents(folder_id):
    """List all files and folders within a specified Google Drive folder."""
    query = f"'{folder_id}' in parents and trashed = false"
    files = []
    try:
        results = service.files().list(q=query, fields="files(id, name, mimeType)").execute()
        files = results.get('files', [])
    except HttpError as error:
        print(f"An error occurred: {error}")
    return files

all_keyframe_paths = {}

video_folders = list_folder_contents(keyframes_dir_id)
for video_folder in video_folders:
    if video_folder['name'] == '.DS_Store' or not video_folder['name'].startswith('Video'):
        continue
    
    video_name = video_folder['name']
    all_keyframe_paths[video_name] = {}
    
    subfolders = list_folder_contents(video_folder['id'])
    for subfolder in subfolders:
        if subfolder['name'] == '.DS_Store' or '_reduced' in subfolder['name']:
            continue
        
        subfolder_name = subfolder['name']
        all_keyframe_paths[video_name][subfolder_name] = []
        
        keyframe_files = list_folder_contents(subfolder['id'])
        for keyframe_file in keyframe_files:
            if keyframe_file['mimeType'] in ['image/jpeg', 'image/jpg'] and keyframe_file['name'] != '.DS_Store':
                file_url = f'https://drive.google.com/uc?id={keyframe_file["id"]}'
                all_keyframe_paths[video_name][subfolder_name].append(file_url)

print(all_keyframe_paths)

# Model

In [None]:
##### Load Model #####
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, preprocess = clip.load("ViT-B/16", device=device)

In [None]:
bs = 4
save_dir = './CLIP_features'
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

for key, video_keyframe_paths in all_keyframe_paths.items():
    video_ids = sorted(video_keyframe_paths.keys())
    
    if not os.path.exists(os.path.join(save_dir, key)):
        os.mkdir(os.path.join(save_dir, key))
    
    for video_id in tqdm(video_ids):
        video_feats = []
        video_keyframe_path = video_keyframe_paths[video_id]
        for i in range(0, len(video_keyframe_path), bs):
            # Support batchsize inferencing
            images = []
            image_urls = video_keyframe_path[i:i+bs]
            for image_url in image_urls:
                print(image_url)
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
                }
                response = requests.get(image_url, headers=headers, allow_redirects=True)
                
                content_type = response.headers.get('content-type', '')
                if 'image' not in content_type.lower():
                    print(f"Skipping non-image URL: {image_url}")
                    continue
                
                try:
                    image = Image.open(BytesIO(response.content))
                    image = preprocess(image).unsqueeze(0)
                    images.append(image)
                except Exception as e:
                    print(f"Error processing image {image_url}: {e}")
                    continue
            
            if not images:
                print(f"No valid images for {video_id}")
                continue
            
            images = torch.cat(images).to(device)

            with torch.no_grad():
                image_feats = model.encode_image(images)
            image_feats /= image_feats.norm(dim=-1, keepdim=True)

            for b in range(image_feats.shape[0]):
                video_feats.append(image_feats[b].detach().cpu().numpy().astype(np.float32).flatten())
        
        np.save(f'{save_dir}/{key}/{video_id}.npy', video_feats)