In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import CIFAR10
from transformers import AutoTokenizer, TrainingArguments, Trainer, CLIPProcessor, CLIPModel
from peft import get_peft_model, LoraConfig, TaskType, RandLoRAConfig
from peft.tuners.lora import LoraLayer

from datasets import Dataset
import numpy as np
from math import floor

ImportError: cannot import name 'RandLoRAConfig' from 'peft' (/opt/anaconda3/envs/guyb_fml/lib/python3.11/site-packages/peft/__init__.py)

In [2]:
import os
import shutil

def create_split(dtd_root, split_file, split_name):
    with open(os.path.join(dtd_root, "labels", split_file), "r") as f:
        lines = [line.strip() for line in f]

    for rel_path in lines:
        class_name, filename = rel_path.split("/")
        src = os.path.join(dtd_root, "images", class_name, filename)
        dst = os.path.join(dtd_root, split_name, class_name, filename)
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        if not os.path.exists(dst):
            shutil.copy2(src, dst)

# Set this to your actual dataset path
dtd_root = "/home/guyb/Documents/data/dtd"

create_split(dtd_root, "train1.txt", "train")
create_split(dtd_root, "val1.txt", "val")

print("✅ DTD train/val directories created successfully.")


✅ DTD train/val directories created successfully.


In [10]:
import os
import subprocess
import zipfile
import shutil
import requests
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Correct path for training script compatibility
base_path = os.path.expanduser("~/Documents/data")  # This is where the model expects it

# Ensure base folders exist
os.makedirs(base_path, exist_ok=True)

# Config
download_url = "https://zenodo.org/record/7711810/files/EuroSAT_RGB.zip"
zip_path = base_path + "/EuroSAT_RGB.zip"
extract_to = base_path + "/EuroSAT_raw"
dataset_root = os.path.join(extract_to, "EuroSAT_RGB")
split_output = base_path + "/EuroSAT_splits"
val_ratio = 0.2
seed = 42

# Step 1: Download if not already present
if not os.path.exists(zip_path):
    print("Downloading EuroSAT_RGB.zip...")
    response = requests.get(download_url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    with open(zip_path, "wb") as f, tqdm(
        desc="Downloading",
        total=total_size,
        unit="B",
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
                bar.update(len(chunk))
else:
    print("✅ EuroSAT_RGB.zip already exists.")

# Step 2: Extract
if not os.path.exists(dataset_root):
    print("Extracting...")
    subprocess.run(["unzip", "-oq", zip_path, "-d", extract_to], check=True)
else:
    print("✅ Dataset already extracted.")

# Step 3: Split and organize
for split in ['train', 'val']:
    os.makedirs(os.path.join(split_output, split), exist_ok=True)

for class_name in os.listdir(dataset_root):
    full_class_path = os.path.join(dataset_root, class_name)
    if not os.path.isdir(full_class_path):
        continue

    images = os.listdir(full_class_path)
    train_imgs, val_imgs = train_test_split(images, test_size=val_ratio, random_state=seed)

    for split, img_list in zip(["train", "val"], [train_imgs, val_imgs]):
        class_output = os.path.join(split_output, split, class_name)
        os.makedirs(class_output, exist_ok=True)
        for img in img_list:
            src = os.path.join(full_class_path, img)
            dst = os.path.join(class_output, img)
            shutil.copyfile(src, dst)

print("✅ Finished preparing EuroSAT_splits with train/val folders.")


✅ EuroSAT_RGB.zip already exists.
✅ Dataset already extracted.
✅ Finished preparing EuroSAT_splits with train/val folders.


In [11]:
import os
import requests

target_dir = os.path.expanduser("~/Documents/data/resisc45")
os.makedirs(target_dir, exist_ok=True)

splits = ["train", "val", "test"]
for split in splits:
    url = f"https://storage.googleapis.com/remote_sensing_representations/resisc45-{split}.txt"
    out_path = os.path.join(target_dir, f"resisc45-{split}.txt")
    if not os.path.exists(out_path):
        print(f"Downloading {split} split...")
        r = requests.get(url)
        with open(out_path, "w") as f:
            f.write(r.text)
    else:
        print(f"{split} split already exists.")


Downloading train split...
Downloading val split...
Downloading test split...


In [None]:
# 1. Load CIFAR10
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

trainset = CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = CIFAR10(root='./data', train=False, download=True, transform=transform)

In [None]:
# 2. Wrap into HF dataset for Trainer compatibility
def convert_to_hf(ds):
    return Dataset.from_dict({
        'pixel_values': [img.numpy() for img, _ in ds],
        'labels': [label for _, label in ds],
    })

train_dataset = convert_to_hf(trainset)
eval_dataset = convert_to_hf(testset)

In [None]:
# 3. Load CLIP (ViT-B-32) model & processor
model_name = "openai/clip-vit-base-patch32"
model = CLIPModel.from_pretrained(model_name)
processor = CLIPProcessor.from_pretrained(model_name)

In [None]:
# 4. Add RandLoRA
peft_config = RandLoRAConfig(
    r=6,
    target_modules=["q_proj", "v_proj"],  # Modify as needed for CLIP
    task_type=TaskType.VISION,
    rank_pattern="normal",  # optional
)

model = get_peft_model(model, peft_config)

In [None]:
# 5. Data preprocessing
def preprocess(batch):
    inputs = processor(images=[torch.tensor(x) for x in batch["pixel_values"]], return_tensors="pt", padding=True)
    inputs["labels"] = batch["labels"]
    return inputs

train_dataset = train_dataset.map(preprocess, batched=True)
eval_dataset = eval_dataset.map(preprocess, batched=True)

In [None]:

# Assume batch_size is 32 to compute gradient accumulation steps (128 // 32 = 4)
batch_size = 32
num_grad_accumulation = floor(128 / batch_size)

training_args = TrainingArguments(
    output_dir="./randlora_cifar10",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,                           # or set dynamically from args.epochs
    learning_rate=1e-3,                           # match your default classifier LR (if not using lp_clip)
    weight_decay=0.01,                            # match args.wd (adjust if different)
    gradient_accumulation_steps=num_grad_accumulation,
    lr_scheduler_type="cosine",                   # match your cosine_lr() scheduler
    logging_steps=10,
    report_to="none",
    remove_unused_columns=False,
    fp16=False,                                   # use bf16 in your script, so keep this False unless you prefer fp16
    bf16=True if torch.cuda.is_available() else False,
)


In [None]:
# 7. Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

In [None]:
# 8. Train
trainer.train()