In [1]:
# Instal dependency
%pip install open_clip_torch

In [2]:
# Import module
import os
import open_clip
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
import requests
from io import BytesIO

# Parse data path

In [None]:
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

SCOPES = 'https://www.googleapis.com/auth/drive'
CLIENT_SECRET = '../credentials.json'
keyframes_dir_id = '1bqJG0CRIIuVIib3pBcA2k8iiRyWlwmq9'

flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET, SCOPES)
creds = flow.run_local_server(port=8502)
service = build('drive', 'v3', credentials=creds)

def list_folder_contents(folder_id):
    """List all files and folders within a specified Google Drive folder."""
    query = f"'{folder_id}' in parents and trashed = false"
    files = []
    try:
        results = service.files().list(q=query, fields="files(id, name, mimeType)").execute()
        files = results.get('files', [])
    except HttpError as error:
        print(f"An error occurred: {error}")
    return files

all_keyframe_paths = {}

# Get video folders inside the keyframes directory
video_folders = list_folder_contents(keyframes_dir_id)
for video_folder in video_folders:
    # Skip .DS_Store and non-video folders
    if video_folder['name'] == '.DS_Store' or not video_folder['name'].startswith('Video'):
        continue
    
    video_name = video_folder['name']
    all_keyframe_paths[video_name] = {}
    
    # Get subfolders (like L00, L01)
    subfolders = list_folder_contents(video_folder['id'])
    for subfolder in subfolders:
        # Skip .DS_Store and reduced folders
        if subfolder['name'] == '.DS_Store' or '_reduced' in subfolder['name']:
            continue
        
        subfolder_name = subfolder['name']
        all_keyframe_paths[video_name][subfolder_name] = []
        
        # Get jpg files in the subfolder
        keyframe_files = list_folder_contents(subfolder['id'])
        for keyframe_file in keyframe_files:
            if keyframe_file['mimeType'] in ['image/jpeg', 'image/jpg'] and keyframe_file['name'] != '.DS_Store':
                file_url = f'https://drive.google.com/uc?id={keyframe_file["id"]}'
                all_keyframe_paths[video_name][subfolder_name].append(file_url)

print(all_keyframe_paths)

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=519645059480-4oq7d1en6p4t5mc6v5qmpba5mohejvef.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8502%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&state=IKqTv6iTdCF3IatIvCzu6XFo9NNrOq&access_type=offline
{'Video0': {'L00': ['https://drive.google.com/uc?id=1mpgGOcc8zjzp95jA0HbYqVm4MuaUJMxU', 'https://drive.google.com/uc?id=1RreIw-OPCFyBPIn_cAc7awecW_2THyDi', 'https://drive.google.com/uc?id=1-2NVov5eIsl9shi1sjWoD-G1UyPSVXT5', 'https://drive.google.com/uc?id=174StS9TVZ2ssMmnuv3bvXduPp6MUdtVB', 'https://drive.google.com/uc?id=1KlKvw55SXn3ORZi5_arWrmmRsShPXuUA', 'https://drive.google.com/uc?id=1dbREXf-uL83C3HkkBnC-9wXdE4U5p4O-', 'https://drive.google.com/uc?id=1JR5fz-ipOeP6c0Usf-Q1WXPVXGEziGl7', 'https://drive.google.com/uc?id=1oqFT7uNLVbyf9AlVE5-EnR3rORRRia2f', 'https://drive.google.com/uc?id=17-6ej1-jctDzrhvfpftePWtnC7_8CxVC', 'https://drive

# Model

In [4]:
##### Load Model #####
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model, _, preprocess = open_clip.create_model_and_transforms('ViT-L-14', device=device, pretrained='datacomp_xl_s13b_b90k')

cpu


  checkpoint = torch.load(checkpoint_path, map_location=map_location)


In [5]:
bs = 4
save_dir = './CLIPv2_features'
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

for key, video_keyframe_paths in all_keyframe_paths.items():
    video_ids = sorted(video_keyframe_paths.keys())
    
    if not os.path.exists(os.path.join(save_dir, key)):
        os.mkdir(os.path.join(save_dir, key))
    
    for video_id in tqdm(video_ids):
        video_feats = []
        video_keyframe_path = video_keyframe_paths[video_id]
        for i in range(0, len(video_keyframe_path), bs):
            # Support batchsize inferencing
            images = []
            image_urls = video_keyframe_path[i:i+bs]
            for image_url in image_urls:
                # Download image from Google Drive with headers
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
                }
                response = requests.get(image_url, headers=headers, allow_redirects=True)
                
                # Verify content type
                content_type = response.headers.get('content-type', '')
                if 'image' not in content_type.lower():
                    print(f"Skipping non-image URL: {image_url}")
                    continue
                
                try:
                    image = Image.open(BytesIO(response.content))
                    image = preprocess(image).unsqueeze(0)
                    images.append(image)
                except Exception as e:
                    print(f"Error processing image {image_url}: {e}")
                    continue
            
            if not images:
                print(f"No valid images for {video_id}")
                continue
            
            images = torch.cat(images).to(device)

            with torch.no_grad():
                image_feats = model.encode_image(images)
            image_feats /= image_feats.norm(dim=-1, keepdim=True)

            for b in range(image_feats.shape[0]):
                video_feats.append(image_feats[b].detach().cpu().numpy().astype(np.float32).flatten())
        
        if video_feats:
            np.save(f'{save_dir}/{key}/{video_id}.npy', video_feats)
        else:
            print(f"No features extracted for {key}/{video_id}")

100%|██████████| 1/1 [00:47<00:00, 47.91s/it]
100%|██████████| 1/1 [00:26<00:00, 26.13s/it]
100%|██████████| 1/1 [04:32<00:00, 272.30s/it]
100%|██████████| 1/1 [02:34<00:00, 154.21s/it]
100%|██████████| 1/1 [04:55<00:00, 295.22s/it]
100%|██████████| 1/1 [04:19<00:00, 259.58s/it]
