In [19]:
from google.colab import drive
drive.mount('/content/drive')
!unzip /content/drive/MyDrive/beyond-visible-spectrum-ai-for-agriculture-2026p2.zip

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Archive:  /content/drive/MyDrive/beyond-visible-spectrum-ai-for-agriculture-2026p2.zip
replace ICPR02/kaggle/Aphid/0041231a3f6f4fa9b07a04234cef4627/B1.tif? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [20]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import tifffile as tiff
from tqdm import tqdm
import os

In [21]:
import os
import glob
import tifffile as tiff
import numpy as np
labeled_samples = glob.glob('/content/ICPR02/kaggle/**/B2.tif', recursive=True)
unlabeled_samples = glob.glob('/content/archive/share/train/**/B2.tif', recursive=True)
def analyze_sample(file_path, title="Sample"):
    folder = os.path.dirname(file_path)
    print(f"--- {title} Analysis ---")
    print(f"Folder: {folder}")
    for b_name in ['B2', 'B12']:
        b_path = os.path.join(folder, f"{b_name}.tif")
        if os.path.exists(b_path):
            data = tiff.imread(b_path)
            print(f"  {b_name}: Shape={data.shape}, Dtype={data.dtype}, Range=[{data.min()}, {data.max()}]")
        else:
            print(f"  {b_name}: NOT FOUND in this folder.")
if labeled_samples:
    analyze_sample(labeled_samples[0], "Labeled")
else:
    print("No labeled B2.tif found. Check path: /content/ICPR02/kaggle")
print("\n")
if unlabeled_samples:
    analyze_sample(unlabeled_samples[0], "Unlabeled")
else:
    print("No unlabeled B2.tif found. Check path: /content/archive/share/train")

--- Labeled Analysis ---
Folder: /content/ICPR02/kaggle/Rust/e3d938b17b2f491384c2fcaa52a40c53
  B2: Shape=(264, 264), Dtype=uint16, Range=[1, 2910]
  B12: Shape=(132, 132), Dtype=uint16, Range=[331, 4412]


--- Unlabeled Analysis ---
Folder: /content/archive/share/train/wheat/000225/20200420T100019_20200420T100021_T33UYU
  B2: Shape=(264, 264), Dtype=uint16, Range=[116, 7164]
  B12: Shape=(132, 132), Dtype=uint16, Range=[85, 7030]


In [22]:
import os
import glob
import torch
import torch.multiprocessing as mp
import numpy as np
from torch.utils.data import Dataset, DataLoader
import tifffile as tiff
from torchvision import transforms
import torch.nn.functional as F
mp.set_sharing_strategy('file_system')
class SentinelSSLDataset(Dataset):
    def __init__(self, root_dirs, target_size=(224, 224)):
        self.target_size = target_size
        self.folder_paths = []
        self.bands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']
        for root in root_dirs:
            search_pattern = os.path.join(root, "**/B2.tif")
            found_files = glob.glob(search_pattern, recursive=True)
            self.folder_paths.extend([os.path.dirname(f) for f in found_files])
        print(f"Dataset initialized with {len(self.folder_paths)} total samples.")
    def __len__(self):
        return len(self.folder_paths)
    def __getitem__(self, idx):
        folder = self.folder_paths[idx]
        band_list = []
        for b in self.bands:
            b_path = os.path.join(folder, f"{b}.tif")
            if not os.path.exists(b_path):
                alt_b = f"B0{b[1:]}" if len(b) == 2 else b
                b_path = os.path.join(folder, f"{alt_b}.tif")
            img = tiff.imread(b_path).astype(np.float32) / 10000.0
            img_tensor = torch.from_numpy(img).unsqueeze(0).unsqueeze(0)
            img_resized = F.interpolate(img_tensor, size=self.target_size, mode='bilinear', align_corners=False)
            band_list.append(img_resized.squeeze())
        full_stack = torch.stack(band_list, dim=0)
        return full_stack
roots = ['/content/ICPR02/kaggle', '/content/archive/share/train']
ssl_dataset = SentinelSSLDataset(roots)
ssl_loader = DataLoader(ssl_dataset, batch_size=16, shuffle=True, num_workers=2)
for batch in ssl_loader:
    print(f"Batch shape: {batch.shape}")
    break

Dataset initialized with 3468 total samples.
Batch shape: torch.Size([16, 12, 224, 224])


In [23]:
class LabeledSentinelDataset(Dataset):
    def __init__(self, root_dir, target_size=(224, 224)):
        self.target_size = target_size
        self.bands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12']

        self.folder_paths = []
        self.labels = []

        self.class_to_idx = {'blast': 0, 'brown': 1, 'healthy': 2, 'rust': 3}

        search_pattern = os.path.join(root_dir, "**/B2.tif")
        found_files = glob.glob(search_pattern, recursive=True)

        for f in found_files:
            folder = os.path.dirname(f)
            class_name = folder.split('/')[-2].lower()
            if class_name in self.class_to_idx:
                self.folder_paths.append(folder)
                self.labels.append(self.class_to_idx[class_name])

    def __len__(self):
        return len(self.folder_paths)

    def __getitem__(self, idx):
        folder = self.folder_paths[idx]
        label = self.labels[idx]
        band_list = []

        for b in self.bands:
            b_path = os.path.join(folder, f"{b}.tif")
            if not os.path.exists(b_path):
                alt_b = f"B0{b[1:]}" if len(b) == 2 else b
                b_path = os.path.join(folder, f"{alt_b}.tif")

            img = tiff.imread(b_path).astype(np.float32) / 10000.0
            img_tensor = torch.from_numpy(img).unsqueeze(0).unsqueeze(0)
            img_resized = F.interpolate(img_tensor, size=self.target_size, mode='bilinear', align_corners=False)
            band_list.append(img_resized.squeeze())

        return torch.stack(band_list, dim=0), label

In [24]:
root_path = '/content/ICPR02/kaggle'
labeled_dataset = LabeledSentinelDataset(root_dir=root_path)
labeled_loader = DataLoader(
    labeled_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=2
)

print(f"Successfully created loader with {len(labeled_dataset)} samples.")

Successfully created loader with 115 samples.


In [25]:
from torch.utils.data import random_split

train_size = int(0.8 * len(labeled_dataset))
val_size = len(labeled_dataset) - train_size
train_ds, val_ds = random_split(labeled_dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2)
val_loader = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=2)

In [26]:
import torch
import torch.nn as nn
from torchvision import models

class BaselineCNN(nn.Module):
    def __init__(self, num_classes=4, input_channels=12):
        super(BaselineCNN, self).__init__()
        self.network = models.resnet18(weights=None)

        self.network.conv1 = nn.Conv2d(
            input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False
        )

        self.network.fc = nn.Linear(self.network.fc.in_features, num_classes)

    def forward(self, x):
        return self.network(x)

In [28]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BaselineCNN(num_classes=4).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(5):
    model.train()
    train_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    model.eval()
    val_correct = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, pred = torch.max(outputs, 1)
            val_correct += (pred == labels).sum().item()

    val_acc = 100 * val_correct / val_size
    print(f"Epoch {epoch+1}: Train Loss: {train_loss/len(train_loader):.4f} | Val Acc: {val_acc:.2f}%")

Epoch 1: Train Loss: 1.1626 | Val Acc: 60.87%
Epoch 2: Train Loss: 0.8039 | Val Acc: 60.87%
Epoch 3: Train Loss: 0.4016 | Val Acc: 39.13%
Epoch 4: Train Loss: 0.3360 | Val Acc: 69.57%
Epoch 5: Train Loss: 0.1738 | Val Acc: 73.91%
