In [None]:
import json

In [None]:
path = "/data/SSD/flickr30k/annotations/train.json"
annotations = json.load(open(path, "r"))

In [None]:
import json
import os
import warnings
from datetime import datetime
import numpy as np
import random
import matplotlib.pyplot as plt

import hydra
import torch
import torch.nn as nn
import torch.nn.functional as F
import transformers
from omegaconf import DictConfig, OmegaConf
from torch.optim.lr_scheduler import (
    CosineAnnealingLR,
)
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from transformers import AutoTokenizer, AutoImageProcessor

# Import local packages
from src.data.imp_datamodule import (
    CDC_train,
    CDC_test,
    EmbeddingManager,
    FolderManager,
)
from src.metric.loss import CosineLoss, MeanSquareLoss
from src.models.cdc import CDC
from src.models.components.clustering import Clustering
from src.utils import EmbeddingManager, FolderManager, evalrank

# Setup
os.environ["TOKENIZERS_PARALLELISM"] = "false"
warnings.filterwarnings("ignore")
transformers.logging.set_verbosity_error()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
cfg = OmegaConf.load("configs/flickr30k.yaml")
model = CDC()
preprocess = AutoImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")

# Initialize FolderManager
folder_manager = FolderManager(base_log_dir=cfg.dataset.log_path)

# Initialize experiment
init_dir, plot_dir = folder_manager.load_experiment(
    "20240626_010207_flickr30k-merge-label-test"
)

# Initialize embedding manager
annotations = json.load(open(cfg.dataset.train_path))
embedding_manager = EmbeddingManager(
    annotations,
    embedding_dim=512,
    chunk_size=10000,
    hdf5_dir=init_dir,
    load_existing=True,
)