In [None]:
%pip install torch torchvision pandas scikit-learn open_clip_torch[training]

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os

# Paths
INDEX_CSV = 'index.csv'      # existing manual captions
TRAIN_ORIGINAL_CSV = 'train_original.csv'
AUGMENTED_TRAIN = "train.csv"
VAL_CSV = 'val.csv'


In [None]:
df = pd.read_csv(INDEX_CSV)
print(f"Total samples: {len(df)}")
df.head()
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
print(f"Train samples: {len(train_df)} | Validation samples: {len(val_df)}")


In [None]:
train_df.to_csv(TRAIN_ORIGINAL_CSV, index=False)
val_df.to_csv(VAL_CSV, index=False)
print(f"Saved {TRAIN_ORIGINAL_CSV} and {VAL_CSV}")
df_train = pd.read_csv(TRAIN_ORIGINAL_CSV)
df_val = pd.read_csv(VAL_CSV)

print("Columns in  index.csv: ",df.columns.tolist())
print("Columns in train_original.csv:", df_train.columns.tolist())
print("Columns in val.csv:", df_val.columns.tolist())

In [None]:
print("installing local llm (llama-cpp-python) dependencies..")
%pip install llama-cpp-python huggingface_hub --quiet
print("Dependencies done")

In [None]:
import torch.cuda
import os
use_gpu = "cuda" if torch.cuda.is_available else "cpu"
print(use_gpu)
CUDA_PATH = os.environ.get("CUDA_PATH")
print(CUDA_PATH)

In [None]:
#manual augment (type synonyms for captions)
#print("\n--- Running Manual Synonym Augmentation ---")
# !python text_augment.py --augmentation_method manual --num_aug_per_original 5

#llm augmentation
print("\n--- Running LLM-based Augmentation ---")
!python text_augment.py --augmentation_method llm --num_aug_per_original 3

train_augmented_df = pd.read_csv(AUGMENTED_TRAIN)
print(f"Final train.csv has {len(train_augmented_df)} entries.")
print(train_augmented_df.head())

In [None]:
!python -m open_clip_train.main \
    --train-data train.csv \
    --val-data val.csv \
    --csv-img-key filepath \
    --csv-caption-key caption \
    --csv-separator "," 
    --model ViT-B-32 \
    --device use_gpu \
    --pretrained openai \
    --batch-size 32 \
    --lr 1e-4 \
    --epochs 10 \
    --warmup 500 \
    --workers 4\
    --aug-cfg "use_timm=True auto_augment='rand-m9-n2-mstd0.5' re_prob=0.25 re_count=1" 