# Diversity Evaluation on Text Modality

In [4]:
import torch
from datasets import load_dataset
from dcscore_function import DCScore

# parameter settings
model_path_dict = {
                    'bert':'path/to/bert-base-uncased',
                    'simcse':'path/to/unsup-simcse-bert-base-uncased',
                    'llama2-7b':'path/to/llama-2-7b-hf',
                    'gpt2':'path/to/gpt2',
                    'bge': 'path/to/bge-large-en-v1.5',
                    'sen_bert': 'path/to/all-mpnet-base-v2'
                  }
model_name = 'sen_bert'
model_path = model_path_dict[model_name]
device = "cuda" if torch.cuda.is_available() else "CPU"
batch_size = 128
tau = 1
kernel_type = 'cs'

# evaluated dataset
text_list = ['who are you', 'I am fine', 'good job']

# dcscore class
dcscore_evaluator = DCScore(model_path)

# get embedding
embeddings, n, d = dcscore_evaluator.get_embedding(text_list, batch_size=batch_size)

# calculate dcscore based on embedding
dataset_dcscore = dcscore_evaluator.calculate_dcscore_by_embedding(embeddings, kernel_type=kernel_type, tau=tau)


# Diversity Evaluation on Visual Modality

In [1]:
import torch
from datasets import load_dataset
from dcscore_function import DCScore
from ImageFilesDataset import ImageFilesDataset

# parameter settings
model_name = 'dinov2'
device = "cuda" if torch.cuda.is_available() else "CPU"
batch_size = 128
tau = 1
kernel_type = 'cs'
sampel_num = 4

# evaluated dataset
img_pth = './demo_images'
dataset = ImageFilesDataset(img_pth, name='None', extension='JPEG', n=sampel_num, conditional=False)

# dcscore class
dcscore_evaluator = DCScore(model_name)

# get embedding
embeddings, n, d = dcscore_evaluator.get_embedding(dataset, batch_size=batch_size)

# calculate dcscore based on embedding
dataset_dcscore = dcscore_evaluator.calculate_dcscore_by_embedding(embeddings, kernel_type=kernel_type, tau=tau)


Using cache found in /root/.cache/torch/hub/facebookresearch_dinov2_main
                                                                         

In [2]:
dataset_dcscore

4.0