In [None]:
import io
from PIL import Image
from datasets import load_dataset
import open_clip

In [None]:
def load_image_from_byte_array(byte_array):
    return Image.open(io.BytesIO(byte_array))
dataset = load_dataset("ykumards/open-i")
dataset = dataset["train"].train_test_split(test_size=0.1)
dataset = dataset.filter(lambda example: example['img_frontal'] is not None)
dataset = dataset.filter(lambda example: example['impression'] is not None)

In [None]:
model, _, preprocess = open_clip.create_model_and_transforms(
    'hf-hub:luhuitong/CLIP-ViT-L-14-448px-MedICaT-ROCO'
)
tokenizer = open_clip.get_tokenizer('hf-hub:luhuitong/CLIP-ViT-L-14-448px-MedICaT-ROCO')

In [None]:
# Print all model parameters with their shapes and count total parameters
total_params = 0
for name, param in model.named_parameters():
    print(f"{name:60} {str(tuple(param.shape)):25} trainable={param.requires_grad}")
    total_params += param.numel()
print(f"\nTotal parameters: {total_params:,}")

In [None]:
# List all modules with their names, types, and parameter counts
for name, module in model.named_modules():
    num_params = sum(p.numel() for p in module.parameters() if p.requires_grad)
    print(f"{name:60} {type(module).__name__:30} trainable_params={num_params}")

In [None]:
# Optional: Show a torchinfo summary if available
try:
    from torchinfo import summary
    summary(model, depth=4, col_names=["input_size", "output_size", "num_params", "trainable"] )
except ImportError:
    print("Install torchinfo with: pip install torchinfo for a compact summary view.")

In [None]:
from peft import LoraConfig, TaskType
peft_config = LoraConfig(
    r=16,                      # rank
    lora_alpha=32,             # scaling
    target_modules = [
    f"visual.transformer.resblocks.{i}.attn.q_proj" for i in range(23, 24)
] + [
    f"visual.transformer.resblocks.{i}.attn.k_proj" for i in range(23, 24)
] + [
    f"visual.transformer.resblocks.{i}.attn.v_proj" for i in range(23, 24)
] + [
    f"visual.transformer.resblocks.{i}.attn.out_proj" for i in range(23, 24)
],
    lora_dropout=0.05,
    bias="none",
    task_type="FEATURE_EXTRACTION"
)

In [None]:
from peft import get_peft_model

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

In [None]:
from transformers import TrainingArguments
training_args = TrainingArguments(
    output_dir="/home/darklord/Projects/Federated_Learning/models",
    learning_rate=1e-4,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

In [None]:
def preprocess_batch(batch):
    pixel_values = []
    for img_bytes in batch['img_frontal']:
        img = load_image_from_byte_array(img_bytes)
        tensor = preprocess(img)
        pixel_values.append(tensor)
    batch['pixel_values'] = pixel_values
    return batch


In [None]:
dataset = dataset.map(preprocess_batch, batched=True, batch_size=2)

In [None]:
def tokenize_batch(batch):
    tokens = tokenizer(batch['impression'])
    batch['input_ids'] = tokens['input_ids']
    batch['attention_mask'] = tokens['attention_mask']
    return batch


dataset = dataset.map(tokenize_batch, batched=True, batch_size=8)


In [None]:
token = tokenizer(dataset["train"][0])
print(token)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()