In [2]:
import pandas as pd

def concat_stats(df: pd.DataFrame, paths_to_concat: list[str]):
    for path in paths_to_concat:
        try:
            df = pd.concat([df, pd.read_csv(path).head(1)])
        except FileNotFoundError as e:
            print(e)
            continue
    
    return df

In [3]:
# Concat stats of all inference batches into a single file
df = concat_stats(pd.DataFrame(), [
    *['results/7B_25_01_31_01/stats/test/{}.csv'.format(str(i).zfill(2)) for i in range(5)],
    *['results/7B_25_01_31_01/stats/train/{}.csv'.format(str(i).zfill(3)) for i in range(115)],
    *['results/7B_25_01_31_01/stats/validation/{}.csv'.format(str(i).zfill(1)) for i in range(2)],
])

display(df)

[Errno 2] No such file or directory: 'results/7B_25_01_31_01/stats/train/064.csv'


Unnamed: 0,chunk_size,total time [s],invalid_json
0,10000,774.646250,[]
0,10000,784.209187,[]
0,10000,811.499063,[]
0,10000,829.635313,[]
0,10000,840.428063,[]
...,...,...,...
0,10000,958.764625,[]
0,10000,952.878938,[]
0,10000,992.750937,[]
0,10000,778.633187,[]


In [4]:
# Show total number of inferences, average inferences per second and total time
total_inferences = df['chunk_size'].sum()
total_gpu_time = df['total time [s]'].sum()
num_4_gpu_workers = 2
total_time = total_gpu_time / num_4_gpu_workers
average_inferences_per_second = total_inferences / total_time

print('Total inferences:', total_inferences)
print('Average inferences per second:', average_inferences_per_second)

# Print total time as hours, minutes and seconds
normalised_hours = int(total_gpu_time // 3600)
normalised_minutes = int((total_gpu_time % 3600) // 60)
normalised_seconds = int(total_gpu_time % 60)

print(f"Normalised time for a single 4 GPU worker: {str(normalised_hours).zfill(2)}h {str(normalised_minutes).zfill(2)}m {str(normalised_seconds).zfill(2)}s ({int(total_gpu_time)} seconds)")

# Print total time as hours, minutes and seconds
hours = int(total_time // 3600)
minutes = int((total_time % 3600) // 60)
seconds = int(total_time % 60)

print(f"Total time: {str(hours).zfill(2)}h {str(minutes).zfill(2)}m {str(seconds).zfill(2)}s ({int(total_time)} seconds)")

Total inferences: 1210000
Average inferences per second: 17.99335463125367
Normalised time for a single 4 GPU worker: 37h 21m 34s (134494 seconds)
Total time: 18h 40m 47s (67247 seconds)


In [3]:
annotation_dir = "/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/validation"
output_dir = "/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations"

In [4]:
from util.annotation_utils import compress_annotations

parquet_paths = compress_annotations(
    annotation_dir=annotation_dir,
    output_dir=output_dir
)

Found 19994 annotations in /pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/validation
Creating 1 parquet file(s)
Finished creating 1 parquet file(s):
	/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/validation.parquet


In [6]:
import pyarrow.parquet as pq

test_paths = [
    "/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/test.parquet",
]

train_paths = [
    "/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_01_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_02_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_03_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_04_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_05_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_06_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_07_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_08_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_09_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_10_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_11_of_12.parquet",
	"/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/train.part_12_of_12.parquet",
]

validation_paths = [
    "/pfss/mlde/workspaces/mlde_wsp_KIServiceCenter/finngu/LlavaGuard/src/experiments/datasets/imagenet/results/7B_25_01_31_01/annotations/validation.parquet",
]

df = pq.ParquetDataset(validation_paths).read().to_pandas()

display(df.sort_values(by='id').head(10))
display(df.sort_values(by='id', ascending=False).head(10))

Unnamed: 0_level_0,rating,category,rationale
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Safe,NA: None applying,The image provided is a photograph of a bird p...
1,Safe,NA: None applying,The image provided is of a dog standing behind...
2,Safe,NA: None applying,The image provided by the user is a photograph...
3,Safe,NA: None applying,The image provided does not contain any conten...
4,Safe,NA: None applying,The image provided is of a bird perched on a b...
5,Safe,NA: None applying,The image provided by the user is of a green m...
6,Safe,NA: None applying,The image provided does not contain any conten...
7,Safe,NA: None applying,The image provided does not contain any conten...
8,Safe,NA: None applying,The image provided by the user does not contai...
9,Safe,NA: None applying,The image provided does not contain any conten...


Unnamed: 0_level_0,rating,category,rationale
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
19999,Safe,NA: None applying,The image provided does not contain any conten...
19998,Safe,NA: None applying,The image provided by the user is of a fossili...
19997,Safe,NA: None applying,The image provided does not contain any conten...
19996,Safe,NA: None applying,The image provided is of a white toaster. Ther...
19995,Safe,NA: None applying,The image provided is of a ceramic pitcher wit...
19994,Safe,NA: None applying,The image provided is of a cat with large ears...
19993,Safe,NA: None applying,The image provided by the user shows a group o...
19992,Safe,NA: None applying,The image provided by the user is of a rusted ...
19991,Safe,NA: None applying,The image provided does not contain any conten...
19990,Safe,NA: None applying,The image provided by the user is a photograph...
