In [2]:
!pip install ftfy regex tqdm
!pip install git+https://github.com/openai/CLIP.git

import torch
import clip
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# 1. Setup device
device = "cuda" if torch.cuda.is_available() else "cpu"

# 2. Load CLIP model
model, preprocess = clip.load("ViT-B/32", device=device)

# 3. Custom Dataset Class
class RiceLeafDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform or preprocess

        self.classes = [cls for cls in sorted(os.listdir(root_dir)) 
                        if os.path.isdir(os.path.join(root_dir, cls)) and not cls.startswith('.')]
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}
        self.images = self._load_images()

    def _load_images(self):
        images = []
        for cls in self.classes:
            cls_path = os.path.join(self.root_dir, cls)
            for img_name in os.listdir(cls_path):
                if not img_name.startswith('.') and img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(cls_path, img_name)
                    try:
                        Image.open(img_path).convert("RGB")
                        images.append((img_path, self.class_to_idx[cls]))
                    except:
                        print(f"Skipping corrupted image: {img_path}")
        return images

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path, label = self.images[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# 4. Prepare Data
dataset = RiceLeafDataset('/Users/tharindua/Downloads/Rice_Leaf_AUG', transform=preprocess)
print("Class to index mapping:", dataset.class_to_idx)

train_idx, val_idx = train_test_split(range(len(dataset)), test_size=0.2, random_state=42)
train_dataset = torch.utils.data.Subset(dataset, train_idx)
val_dataset = torch.utils.data.Subset(dataset, val_idx)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

# 5. Fine-Tune Model
class FineTunedCLIP(torch.nn.Module):
    def __init__(self, clip_model, num_classes):
        super().__init__()
        self.clip = clip_model
        self.classifier = torch.nn.Linear(512, num_classes)  # CLIP ViT-B/32 outputs 512-dim

    def forward(self, images):
        with torch.no_grad():  # Freeze CLIP backbone during initial training
            image_features = self.clip.encode_image(images)
        return self.classifier(image_features.float())

# 6. Initialize model and freeze CLIP
num_classes = len(dataset.classes)
ft_model = FineTunedCLIP(model, num_classes).to(device)

# Optionally unfreeze CLIP after some epochs for fine-tuning
for param in ft_model.clip.parameters():
    param.requires_grad = False

# 7. Loss & Optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(ft_model.classifier.parameters(), lr=1e-4, weight_decay=0.01)

# 8. Train and Validate Functions
def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss, correct = 0, 0

    for images, labels in tqdm(loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        correct += (outputs.argmax(1) == labels).sum().item()

    return total_loss / len(loader), correct / len(loader.dataset)

def validate(model, loader, criterion, device):
    model.eval()
    total_loss, correct = 0, 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()

    return total_loss / len(loader), correct / len(loader.dataset)

# 9. Training Loop
best_val_acc = 0
for epoch in range(20):
    train_loss, train_acc = train_epoch(ft_model, train_loader, optimizer, criterion, device)
    val_loss, val_acc = validate(ft_model, val_loader, criterion, device)

    print(f"Epoch {epoch+1}:")
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
    print(f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(ft_model.state_dict(), "best_clip_rice_model.pth")
        print("✅ New best model saved!")

# 10. Load and Evaluate
ft_model.load_state_dict(torch.load("best_clip_rice_model.pth"))
_, final_acc = validate(ft_model, val_loader, criterion, device)
print(f"📊 Final Validation Accuracy: {final_acc:.4f}")

# 11. Single Image Prediction Function
def predict_image(image_path, model, preprocess, class_mapping):
    model.eval()
    image = preprocess(Image.open(image_path).convert("RGB")).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(image)
        pred = output.argmax(1).item()
    return class_mapping[pred]

# Example usage:
class_mapping = {v: k for k, v in dataset.class_to_idx.items()}
image_path = "/Users/tharindua/Downloads/sample_leaf.jpg"  # Replace with test image
predicted_class = predict_image(image_path, ft_model, preprocess, class_mapping)
print(f"🧠 Predicted class: {predicted_class}")



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /private/var/folders/f0/vdhww2m16hvdbbnjjqfjwqhm0000gp/T/pip-req-build-spq63nyh
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /private/var/folders/f0/vdhww2m16hvdbbnjjqfjwqhm0000gp/T/pip-req-build-spq63nyh
  Resolved https://github.com/openai/CLIP.git to commit dcba3cb2e2827b402d2701e7e1c7d9fed8a20ef1
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[

ModuleNotFoundError: No module named 'sklearn'