In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

First, make more data:

In [2]:
import os
from pathlib import Path
import pandas as pd
from PIL import Image
from tqdm.auto import tqdm

# Config
TRAIN_CSV = Path("/kaggle/input/csiro-biomass/train.csv")
INPUT_ROOT = Path("/kaggle/input/csiro-biomass")
OUT_IMG_DIR = Path("/kaggle/working/train_aug")
OUT_CSV = Path("/kaggle/working/train_augmented.csv")
CREATE_ROT180 = True
CREATE_FLIP_X = True
CREATE_FLIP_Y = True

if OUT_CSV.exists() and OUT_IMG_DIR.exists() and any(OUT_IMG_DIR.iterdir()):
    print("Augmented CSV and images already exist. Skipping creation.")
else:
    print("Creating augmentations")
    OUT_IMG_DIR.mkdir(parents=True, exist_ok=True)

    df_orig = pd.read_csv(TRAIN_CSV)
    unique_paths = df_orig['image_path'].unique()

    created_images = {}
    def make_augmented_filename(orig_filename: str, suffix: str):
        p = Path(orig_filename)
        return f"{p.stem}{suffix}{p.suffix}"

    for img_rel in tqdm(unique_paths, desc="Creating augmented images"):
        src_path = INPUT_ROOT / img_rel
        if not src_path.exists():
            src_path = Path(img_rel)
            if not src_path.exists():
                raise FileNotFoundError(f"Could not find source image: {img_rel}")

        img = Image.open(src_path).convert("RGB")
        created_images[img_rel] = []

        if CREATE_ROT180:
            out_name = make_augmented_filename(img_rel, "_rot180")
            img_rot = img.rotate(180)
            img_rot.save(OUT_IMG_DIR / out_name, quality=95)
            created_images[img_rel].append(out_name)

        if CREATE_FLIP_X:
            out_name = make_augmented_filename(img_rel, "_flipx")
            img_fx = img.transpose(Image.FLIP_TOP_BOTTOM)
            img_fx.save(OUT_IMG_DIR / out_name, quality=95)
            created_images[img_rel].append(out_name)

        if CREATE_FLIP_Y:
            out_name = make_augmented_filename(img_rel, "_flipy")
            img_fy = img.transpose(Image.FLIP_LEFT_RIGHT)
            img_fy.save(OUT_IMG_DIR / out_name, quality=95)
            created_images[img_rel].append(out_name)

    aug_rows = []
    for _, row in tqdm(df_orig.iterrows(), total=len(df_orig), desc="Building augmented CSV rows"):
        img_rel = row['image_path']
        orig_sample_id = str(row['sample_id'])
        left, right = orig_sample_id.split("__", 1)
        base_image_id = left
        target_name = right
        for out_name in created_images[img_rel]:
            suffix = "_" + Path(out_name).stem.replace(Path(img_rel).stem, "")
            new_row = row.copy()
            new_image_id = f"{base_image_id}{suffix}"
            new_row['sample_id'] = f"{new_image_id}__{target_name}"
            # relative path in new CSV points to OUT_IMG_DIR basename (we will use 'train_aug/<name>')
            new_row['image_path'] = str(Path(OUT_IMG_DIR.name) / out_name)
            aug_rows.append(new_row)

    df_aug = pd.DataFrame(aug_rows)
    final_df = pd.concat([df_orig, df_aug], ignore_index=True)
    final_df.to_csv(OUT_CSV, index=False)
    print("Saved augmented CSV to:", OUT_CSV)
    print("Saved augmented images to:", OUT_IMG_DIR)
    print("Original rows:", len(df_orig), "Augmented rows added:", len(df_aug), "Final rows:", len(final_df))


Creating augmentations


Creating augmented images:   0%|          | 0/357 [00:00<?, ?it/s]

Building augmented CSV rows:   0%|          | 0/1785 [00:00<?, ?it/s]

Saved augmented CSV to: /kaggle/working/train_augmented.csv
Saved augmented images to: /kaggle/working/train_aug
Original rows: 1785 Augmented rows added: 5355 Final rows: 7140


In [3]:
import matplotlib.pyplot as plt
import os
from PIL import Image
from tqdm.auto import tqdm
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import pathlib
from pathlib import Path
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import random
import copy
from tqdm.auto import tqdm
from sklearn.model_selection import GroupKFold


print("Done")

Done


In [None]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
torch.backends.cudnn.benchmark = True

USE_AUGMENTED = True   # True if you have run the previour cell
AUG_CSV_PATH = Path("/kaggle/working/train_augmented.csv")
AUG_IMG_DIR  = Path("/kaggle/working/train_aug")

TRAIN_CSV = "/kaggle/input/csiro-biomass/train.csv"
TEST_CSV = "/kaggle/input/csiro-biomass/test.csv"
TRAIN_IMG_ROOT = "/kaggle/input/csiro-biomass/train"
TEST_IMG_ROOT  = "/kaggle/input/csiro-biomass/test"

# hyperparameters
IMG_SIZE = 384
BATCH_SIZE = 32
LR = 3e-4
EPOCHS = 30 # change to 30 later
N_FOLDS = 5 # change to 5 later
TAB_DIM = 8

TARGET_NAMES = ["Dry_Clover_g","Dry_Dead_g","Dry_Green_g","Dry_Total_g","GDM_g"]
target_weights = {"Dry_Green_g":0.1,"Dry_Dead_g":0.1,"Dry_Clover_g":0.1,"GDM_g":0.2,"Dry_Total_g":0.5}


Device: cuda


Load data. Long -> Wide data format

In [5]:
# if USE_AUGMENTED:
df_raw = pd.read_csv(AUG_CSV_PATH)
IMG_ROOT = Path.cwd() / AUG_IMG_DIR.name

def fix_path(p):
    p = str(p)
    if p.startswith("train_aug/"):
        return str(IMG_ROOT / Path(p).name)
    return str(Path(TRAIN_IMG_ROOT) / Path(p).name)

df_raw["image_path"] = df_raw["image_path"].apply(fix_path)

df_raw['image_path'] = df_raw['image_path'].astype(str)
df_raw['image_file_stem'] = df_raw['image_path'].apply(lambda p: Path(p).stem)
df_raw['orig_image_id'] = df_raw['image_file_stem'].str.split("_").str[0]

meta_cols = ["image_path","Sampling_Date","State","Species","Pre_GSHH_NDVI","Height_Ave_cm"]
wide_targets = df_raw.pivot_table(index="image_file_stem", values="target", columns="target_name", aggfunc='first').reset_index()
meta = df_raw.groupby("image_file_stem").first()[meta_cols].reset_index()
df = meta.merge(wide_targets, on="image_file_stem", how="left")

orig_map = df_raw.groupby("image_file_stem")["orig_image_id"].first().reset_index()
df = df.merge(orig_map, on="image_file_stem", how="left")

df = df.reset_index(drop=True)
df["global_idx"] = np.arange(len(df))

print("Per-file samples (including augmentations):", len(df))

Per-file samples (including augmentations): 1428


In [6]:
def add_date_features(df, date_col="Sampling_Date"):
    dates = pd.to_datetime(df[date_col], errors="coerce")
    doy = dates.dt.dayofyear.fillna(1).astype(int)
    df = df.copy()
    df["date_sin"] = np.sin(2*np.pi*doy/365.25)
    df["date_cos"] = np.cos(2*np.pi*doy/365.25)
    return df

df = add_date_features(df)

In [7]:
n = len(df)
m = len(TARGET_NAMES)

targets = np.zeros((n, m), dtype=np.float32)
mask = np.zeros((n, m), dtype=np.float32)

for i, row in df.iterrows():
    for j, t in enumerate(TARGET_NAMES):
        val = row.get(t, np.nan)
        if pd.notna(val):
            targets[i,j] = float(val)
            mask[i,j] = 1.0
        else:
            targets[i,j] = 0.0
            mask[i,j] = 0.0

In [8]:
test_raw = pd.read_csv(TEST_CSV)
test_raw['image_path'] = test_raw['image_path'].astype(str)
test_raw['image_file_stem'] = test_raw['image_path'].apply(lambda p: Path(p).stem)
test_images = test_raw.groupby("image_file_stem").first().reset_index()[["image_file_stem","image_path"]]
print("Test images count:", len(test_images))

Test images count: 1


In [9]:
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE,IMG_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.1,0.1,0.1,0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE,IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

In [None]:
class FusionDataset(Dataset):
    def __init__(self, df_subset, tab_arr, target_arr=None, mask_arr=None, transform=None, img_root=None, return_index=False):
        self.df = df_subset.reset_index(drop=True)
        self.tab = torch.tensor(tab_arr, dtype=torch.float32)
        self.targets = torch.tensor(target_arr, dtype=torch.float32) if target_arr is not None else None
        self.mask = torch.tensor(mask_arr, dtype=torch.float32) if mask_arr is not None else None
        self.transform = transform
        self.img_root = Path(img_root) if img_root is not None else None
        self.return_index = return_index

    def __len__(self): return len(self.df)

    def _resolve(self, p):
        p = Path(p)
        if p.exists(): return p
        if self.img_root is not None:
            cand = self.img_root / p.name
            if cand.exists(): return cand
        return p

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self._resolve(row["image_path"])
        img = Image.open(str(img_path)).convert("RGB")
        if self.transform: img = self.transform(img)
        tab = self.tab[idx]
        if self.targets is not None:
            if self.return_index:
                return img, tab, self.targets[idx], self.mask[idx], int(row['global_idx'])
            return img, tab, self.targets[idx], self.mask[idx]
        else:
            if self.return_index:
                return img, tab, int(row['global_idx'])
            return img, tab, row["image_file_stem"]

class TestImageDataset(Dataset):
    def __init__(self, df, tab_arr, transform=None, img_root=None):
        self.df = df.reset_index(drop=True)
        self.tab = torch.tensor(tab_arr, dtype=torch.float32)
        self.transform = transform
        self.img_root = Path(img_root) if img_root is not None else None
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = Path(row["image_path"])
        if not img_path.exists() and self.img_root is not None:
            img_path = self.img_root / img_path.name
        img = Image.open(str(img_path)).convert("RGB")
        if self.transform: img = self.transform(img)
        return img, self.tab[idx], row["image_file_stem"]

class FusionMultiOutputModel(nn.Module):
    def __init__(self, tab_dim, n_targets=len(TARGET_NAMES), freeze_backbone=False):
        super().__init__()

        # --- ConvNeXt-L instead of EfficientNet-B0 ---
        from torchvision.models import convnext_large

        self.backbone = convnext_large(weights=None)

        # Replace classifier with identity to get raw features
        self.backbone.classifier = nn.Identity()

        # ConvNeXt-L output embedding size
        image_feat_dim = 1536

        if freeze_backbone:
            for p in self.backbone.parameters():
                p.requires_grad = False

        # --- Tabular MLP (kept exactly the same) ---
        hidden1 = max(32, tab_dim * 2)
        self.tab_mlp = nn.Sequential(
            nn.Linear(tab_dim, hidden1),
            nn.ReLU(),
            nn.BatchNorm1d(hidden1),
            nn.Linear(hidden1, 64),
            nn.ReLU(),
        )

        # --- Fusion Layer (image + tabular) ---
        fusion_dim = image_feat_dim + 64

        # Improved shared block:
        # LayerNorm → GELU → Dropout → Linear
        self.shared = nn.Sequential(
            nn.LayerNorm(fusion_dim),
            nn.Linear(fusion_dim, 512),
            nn.GELU(),
            nn.Dropout(0.2)
        )

        # --- Multi-head regression (better version) ---
        # LayerNorm → GELU → Linear → GELU → Output
        self.heads = nn.ModuleList([
            nn.Sequential(
                nn.LayerNorm(512),
                nn.Linear(512, 256),
                nn.GELU(),
                nn.Dropout(0.1),
                nn.Linear(256, 1)
            )
            for _ in range(n_targets)
        ])

    def forward(self, img, tab):
        img_feat = self.backbone(img)
        tab_feat = self.tab_mlp(tab)

        x = torch.cat([img_feat, tab_feat], dim=1)
        x = self.shared(x)

        out = torch.cat([h(x) for h in self.heads], dim=1)
        return out



In [None]:
def build_tabular_from_df_local(given_df, enc=None, scaler=None):
    n = len(given_df)
    return np.zeros((n, TAB_DIM), dtype=np.float32)

def masked_mse(preds, targets, mask):
    diff = (preds - targets) * mask
    denom = mask.sum()
    if denom == 0:
        return torch.tensor(0.0, device=preds.device, dtype=preds.dtype)
    return (diff**2).sum() / denom

def weighted_r2(y_true, y_pred, names):
    w = np.array([target_weights[n] for n in names], dtype=float)
    mu = np.sum(w * y_true) / np.sum(w)
    ss_res = np.sum(w * (y_true - y_pred)**2)
    ss_tot = np.sum(w * (y_true - mu)**2)
    if ss_tot == 0: return 0.0
    return 1.0 - ss_res / ss_tot

KFold + training

In [12]:
test_preds_accum = np.zeros((len(test_images), len(TARGET_NAMES)), dtype=np.float32)
test_counts = np.zeros(len(test_images), dtype=np.float32)
test_index_map = {row["image_file_stem"]: idx for idx, row in test_images.reset_index().iterrows()}

In [13]:
groups = df["orig_image_id"].values
gkf = GroupKFold(n_splits=N_FOLDS)

fold_no = 0
for train_idx, val_idx in gkf.split(df, df, groups=groups):
    fold_no += 1
    print(f"Fold {fold_no}/{N_FOLDS}")
    df_train = df.iloc[train_idx].reset_index(drop=True)
    df_val = df.iloc[val_idx].reset_index(drop=True)
    y_train_raw = targets[train_idx]; mask_train = mask[train_idx]
    y_val_raw = targets[val_idx]; mask_val = mask[val_idx]

    # Fit enc/scaler on train only
    enc = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
    enc.fit(df_train[["State","Species"]].fillna("missing"))
    scaler = StandardScaler()
    scaler.fit(df_train[["Pre_GSHH_NDVI","Height_Ave_cm"]].fillna(0).values)

    X_tab_train = build_tabular_from_df_local(df_train, enc, scaler)
    X_tab_val = build_tabular_from_df_local(df_val, enc, scaler)

    # compute target means/std on train only
    t_means = np.zeros(m, dtype=np.float32); t_stds = np.ones(m, dtype=np.float32)
    for j in range(m):
        vals = y_train_raw[mask_train[:,j]==1, j]
        if len(vals) > 0:
            t_means[j] = vals.mean()
            t_stds[j]  = vals.std() if vals.std() > 0 else 1.0

    # normalize the global targets using train stats
    targets_norm = (targets - t_means.reshape(1,-1)) / t_stds.reshape(1,-1)
    y_tr = targets_norm[train_idx]; y_v = targets_norm[val_idx]

    # dataloaders
    train_ds = FusionDataset(df_train, X_tab_train, y_tr, mask_train, transform=train_transform, img_root=IMG_ROOT, return_index=False)
    val_ds = FusionDataset(df_val, X_tab_val, y_v, mask_val, transform=val_transform, img_root=IMG_ROOT, return_index=False)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=(DEVICE=="cuda"))
    val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=(DEVICE=="cuda"))


    # model, optimizer, scheduler
    tab_dim = X_tab_train.shape[1]
    model = FusionMultiOutputModel(tab_dim, n_targets=m, pretrained=True).to(DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=False)

    best_val_r2 = -9e9
    patience_counter = 0
    PATIENCE = 3 
    best_state = None

    # training epochs
    for epoch in range(1, EPOCHS+1):
        model.train()
        running_loss_sum = 0.0
        running_obs = 0.0
        train_iter = tqdm(train_loader, desc=f"Fold{fold_no} Ep{epoch} train", leave=False)
        for batch in train_iter:
            imgs, tabs, ys, masks_batch = batch
            imgs = imgs.to(DEVICE)
            tabs = tabs.to(DEVICE)
            ys = ys.to(DEVICE)
            masks_batch = masks_batch.to(DEVICE)
            
            preds = model(imgs, tabs)
            loss = masked_mse(preds, ys, masks_batch)
            
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            
            n_obs = float(masks_batch.sum().item())
            running_loss_sum += loss.item() * n_obs
            running_obs += n_obs
            train_iter.set_postfix({"batch_loss": f"{loss.item():.4f}"})
            
        train_loss = running_loss_sum / max(1e-8, running_obs)

        # validation
        model.eval()
        val_loss_sum = 0.0
        val_obs = 0.0
        all_true = []
        all_pred = []
        all_names = []
        
        val_iter = tqdm(val_loader, desc=f"Fold{fold_no} Ep{epoch} val  ", leave=False)
        with torch.no_grad():
            for batch in val_iter:
                imgs, tabs, ys, masks_batch = batch
                imgs = imgs.to(DEVICE)
                tabs = tabs.to(DEVICE)
                ys = ys.to(DEVICE)
                masks_batch = masks_batch.to(DEVICE)
                
                preds_norm = model(imgs, tabs)
                loss = masked_mse(preds_norm, ys, masks_batch)
                
                n_obs = float(masks_batch.sum().item())
                val_loss_sum += loss.item() * n_obs
                val_obs += n_obs

                preds_raw = preds_norm.cpu().numpy() * t_stds.reshape(1,-1) + t_means.reshape(1,-1)
                ys_raw = ys.cpu().numpy() * t_stds.reshape(1,-1) + t_means.reshape(1,-1)
                masks_np = masks_batch.cpu().numpy()
                
                for i in range(preds_raw.shape[0]):
                    for j, tname in enumerate(TARGET_NAMES):
                        if masks_np[i,j] == 1:
                            all_true.append(float(ys_raw[i,j]))
                            all_pred.append(float(preds_raw[i,j]))
                            all_names.append(tname)
                            
                val_iter.set_postfix({"batch_loss": f"{loss.item():.4f}"})

        val_loss = val_loss_sum / max(1e-8, val_obs)
        val_r2 = weighted_r2(np.array(all_true), np.array(all_pred), all_names) if len(all_true) else 0.0
        print(f"Fold{fold_no} Epoch {epoch} train_loss={train_loss:.6f} val_loss={val_loss:.6f} val_weightedR2={val_r2:.6f}")

        scheduler.step(val_loss)
        if val_r2 > best_val_r2:
            best_val_r2 = val_r2
            best_state = {k: v.cpu().clone() for k,v in model.state_dict().items()}
            patience_counter = 0
            print("  New best val weighted R2:", best_val_r2)
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                print(f"Early stopping triggered at epoch {epoch}. Best val R2={best_val_r2:.6f}")
                break

    
    if best_state is not None:
        model.load_state_dict(best_state)
        print(f"Fold {fold_no}: restored best model state (val weighted R2={best_val_r2:.6f})")
        
    # Test inference for this fold (accumulate)
    tabular_test = np.zeros((len(test_images), TAB_DIM), dtype=np.float32)

    
    test_ds = TestImageDataset(test_images, tabular_test, transform=val_transform, img_root=TEST_IMG_ROOT)
    test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=(DEVICE=="cuda"))

    with torch.no_grad():
        for imgs, tabs, stems in tqdm(test_loader, desc=f"Fold{fold_no} Test infer"):
            imgs = imgs.to(DEVICE); tabs = tabs.to(DEVICE)
            preds_norm = model(imgs, tabs).cpu().numpy()
            preds_raw = preds_norm * t_stds.reshape(1,-1) + t_means.reshape(1,-1)
            preds_raw = np.clip(preds_raw, 0.0, None)
            for stem, vec in zip(stems, preds_raw):
                ti = test_index_map[stem]
                test_preds_accum[ti] += vec
                test_counts[ti] += 1

# Average test predictions across folds
test_counts = np.maximum(test_counts, 1)
test_preds_avg = test_preds_accum / test_counts.reshape(-1,1)

Fold 1/5


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


Fold1 Ep1 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold1 Ep1 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold1 Epoch 1 train_loss=1.023888 val_loss=1.025035 val_weightedR2=0.214395
  New best val weighted R2: 0.21439499116608995


Fold1 Ep2 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold1 Ep2 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold1 Epoch 2 train_loss=0.974706 val_loss=0.944565 val_weightedR2=0.288503
  New best val weighted R2: 0.2885027206845012


Fold1 Ep3 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold1 Ep3 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold1 Epoch 3 train_loss=0.945181 val_loss=1.386487 val_weightedR2=-0.002510


Fold1 Ep4 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold1 Ep4 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold1 Epoch 4 train_loss=0.927253 val_loss=1.020526 val_weightedR2=0.268576


Fold1 Ep5 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold1 Ep5 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold1 Epoch 5 train_loss=0.886528 val_loss=0.980767 val_weightedR2=0.167192
Early stopping triggered at epoch 5. Best val R2=0.288503
Fold 1: restored best model state (val weighted R2=0.288503)


Fold1 Test infer:   0%|          | 0/1 [00:00<?, ?it/s]

Fold 2/5


Exception ignored in: <function _ConnectionBase.__del__ at 0x7c3bfe18a520>
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 133, in __del__
    self._close()
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 377, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


Fold2 Ep1 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold2 Ep1 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold2 Epoch 1 train_loss=0.985592 val_loss=0.862655 val_weightedR2=0.258092
  New best val weighted R2: 0.25809186901632686


Fold2 Ep2 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold2 Ep2 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold2 Epoch 2 train_loss=0.950157 val_loss=0.929086 val_weightedR2=0.162085


Fold2 Ep3 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold2 Ep3 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold2 Epoch 3 train_loss=0.905080 val_loss=0.859065 val_weightedR2=0.256404


Fold2 Ep4 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold2 Ep4 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold2 Epoch 4 train_loss=0.849161 val_loss=1.020490 val_weightedR2=0.181033
Early stopping triggered at epoch 4. Best val R2=0.258092
Fold 2: restored best model state (val weighted R2=0.258092)


Fold2 Test infer:   0%|          | 0/1 [00:00<?, ?it/s]

Fold 3/5


Exception in thread QueueFeederThread:
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 178, in close
    self._close()
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 377, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.11/threading.py", line 982, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 271, in _feed
    queue_sem.release()
ValueError: semaphore or lock released too many times
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientn

Fold3 Ep1 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep1 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 1 train_loss=1.018731 val_loss=1.056792 val_weightedR2=0.228152
  New best val weighted R2: 0.22815213728791928


Fold3 Ep2 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep2 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 2 train_loss=0.976059 val_loss=1.120992 val_weightedR2=0.113667


Fold3 Ep3 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep3 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 3 train_loss=0.949414 val_loss=1.025527 val_weightedR2=0.166437


Fold3 Ep4 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^Exception ignored in: 
<function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    
assert self._parent_pid == os.getpid(), 'can only test a child process'Traceback (most recent call last):

   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
         self._shutdown_workers()  
   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
      if w.is_alive():^^
 ^ ^ ^ ^^ ^ ^ ^^^^^^^^^^^^^^^^

Fold3 Ep4 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold3 Epoch 4 train_loss=0.905569 val_loss=1.099110 val_weightedR2=0.292503
  New best val weighted R2: 0.2925025050144


Fold3 Ep5 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep5 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 5 train_loss=0.895661 val_loss=1.009096 val_weightedR2=0.316982
  New best val weighted R2: 0.3169817875947626


Fold3 Ep6 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep6 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 6 train_loss=0.884859 val_loss=1.287519 val_weightedR2=0.136360


Fold3 Ep7 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep7 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 7 train_loss=0.835638 val_loss=1.026331 val_weightedR2=0.285172


Fold3 Ep8 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold3 Ep8 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 8 train_loss=0.784980 val_loss=1.204410 val_weightedR2=0.338165
  New best val weighted R2: 0.33816509134978123


Fold3 Ep9 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep9 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 9 train_loss=0.789419 val_loss=1.274890 val_weightedR2=0.310135


Fold3 Ep10 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep10 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 10 train_loss=0.722594 val_loss=0.805267 val_weightedR2=0.382319
  New best val weighted R2: 0.3823193506816166


Fold3 Ep11 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep11 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 11 train_loss=0.686147 val_loss=0.835457 val_weightedR2=0.366681


Fold3 Ep12 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold3 Ep12 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 12 train_loss=0.673358 val_loss=0.824196 val_weightedR2=0.418603
  New best val weighted R2: 0.4186034759632904


Fold3 Ep13 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep13 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 13 train_loss=0.659196 val_loss=0.735876 val_weightedR2=0.451142
  New best val weighted R2: 0.45114237757877673


Fold3 Ep14 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep14 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 14 train_loss=0.647968 val_loss=0.916104 val_weightedR2=0.315408


Fold3 Ep15 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep15 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 15 train_loss=0.651512 val_loss=1.134554 val_weightedR2=0.408581


Fold3 Ep16 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold3 Ep16 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 16 train_loss=0.644725 val_loss=0.662871 val_weightedR2=0.515582
  New best val weighted R2: 0.5155816774865564


Fold3 Ep17 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep17 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 17 train_loss=0.586922 val_loss=0.739998 val_weightedR2=0.491526


Fold3 Ep18 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep18 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 18 train_loss=0.609702 val_loss=0.688395 val_weightedR2=0.539442
  New best val weighted R2: 0.5394422122517268


Fold3 Ep19 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold3 Ep19 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold3 Epoch 19 train_loss=0.594049 val_loss=0.710019 val_weightedR2=0.570376
  New best val weighted R2: 0.5703761778708976


Fold3 Ep20 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep20 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 20 train_loss=0.585435 val_loss=0.593466 val_weightedR2=0.537549


Fold3 Ep21 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep21 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 21 train_loss=0.579177 val_loss=0.577141 val_weightedR2=0.565039


Fold3 Ep22 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold3 Ep22 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold3 Epoch 22 train_loss=0.570168 val_loss=0.669004 val_weightedR2=0.542126
Early stopping triggered at epoch 22. Best val R2=0.570376
Fold 3: restored best model state (val weighted R2=0.570376)


Fold3 Test infer:   0%|          | 0/1 [00:00<?, ?it/s]

Fold 4/5


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


Fold4 Ep1 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold4 Ep1 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold4 Epoch 1 train_loss=1.015136 val_loss=1.007265 val_weightedR2=0.235854
  New best val weighted R2: 0.23585397116129403


Fold4 Ep2 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
: AssertionErrorcan only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold4 Ep2 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold4 Epoch 2 train_loss=0.979093 val_loss=0.896617 val_weightedR2=0.321592
  New best val weighted R2: 0.3215917020771455


Fold4 Ep3 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold4 Ep3 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold4 Epoch 3 train_loss=0.965189 val_loss=0.830362 val_weightedR2=0.452236
  New best val weighted R2: 0.45223561705932636


Fold4 Ep4 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold4 Ep4 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold4 Epoch 4 train_loss=0.886148 val_loss=1.092974 val_weightedR2=0.325231


Fold4 Ep5 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold4 Ep5 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold4 Epoch 5 train_loss=0.883847 val_loss=0.740181 val_weightedR2=0.437524


Fold4 Ep6 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold4 Ep6 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold4 Epoch 6 train_loss=0.852884 val_loss=0.984293 val_weightedR2=0.449546
Early stopping triggered at epoch 6. Best val R2=0.452236
Fold 4: restored best model state (val weighted R2=0.452236)


Fold4 Test infer:   0%|          | 0/1 [00:00<?, ?it/s]

Fold 5/5


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
Exception ignored in: <function _ConnectionBase.__del__ at 0x7c3bfe18a520>
Traceback (most recent call last):
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 133, in __del__
    self._close()
  File "/usr/lib/python3.11/multiprocessing/connection.py", line 377, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor


Fold5 Ep1 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep1 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 1 train_loss=0.994269 val_loss=1.241229 val_weightedR2=0.220089
  New best val weighted R2: 0.22008925899626164


Fold5 Ep2 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep2 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 2 train_loss=0.941794 val_loss=1.190242 val_weightedR2=0.269727
  New best val weighted R2: 0.2697268738698039


Fold5 Ep3 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep3 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 3 train_loss=0.900700 val_loss=1.132849 val_weightedR2=0.340359
  New best val weighted R2: 0.34035926362497726


Fold5 Ep4 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep4 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 4 train_loss=0.882317 val_loss=1.130721 val_weightedR2=0.331816


Fold5 Ep5 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
Exception ignored in:   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
<function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>    
self._shutdown_workers()Traceback (most recent call last):

  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
        if w.is_alive():self._shutdown_workers()

   File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
       if w.is_alive(): 
    ^^  ^ ^ ^^ ^^^^^^^^^^^^^^^^
^^  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive

      File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
assert self._par

Fold5 Ep5 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 5 train_loss=0.868024 val_loss=1.128291 val_weightedR2=0.340946
  New best val weighted R2: 0.3409456179282718


Fold5 Ep6 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep6 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 6 train_loss=0.841518 val_loss=1.132989 val_weightedR2=0.319805


Fold5 Ep7 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep7 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 7 train_loss=0.827049 val_loss=1.028094 val_weightedR2=0.397351
  New best val weighted R2: 0.39735082471373484


Fold5 Ep8 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep8 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 8 train_loss=0.816766 val_loss=1.030658 val_weightedR2=0.403765
  New best val weighted R2: 0.40376536064676594


Fold5 Ep9 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep9 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 9 train_loss=0.805273 val_loss=0.973881 val_weightedR2=0.419818
  New best val weighted R2: 0.41981802368702625


Fold5 Ep10 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep10 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 10 train_loss=0.748167 val_loss=0.982325 val_weightedR2=0.448888
  New best val weighted R2: 0.4488875097561529


Fold5 Ep11 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep11 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 11 train_loss=0.699872 val_loss=0.992154 val_weightedR2=0.388803


Fold5 Ep12 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^Exception ignored in: ^^<function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>^
^Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
^^    ^self._shutdown_workers()
^  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
^    ^if w.is_alive():^
^  ^  ^ ^

Fold5 Ep12 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 12 train_loss=0.673589 val_loss=0.940620 val_weightedR2=0.417771


Fold5 Ep13 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep13 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 13 train_loss=0.676683 val_loss=0.854388 val_weightedR2=0.476462
  New best val weighted R2: 0.476462363728811


Fold5 Ep14 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep14 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 14 train_loss=0.664783 val_loss=0.818425 val_weightedR2=0.491289
  New best val weighted R2: 0.4912890255691871


Fold5 Ep15 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold5 Ep15 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 15 train_loss=0.643028 val_loss=0.895650 val_weightedR2=0.495565
  New best val weighted R2: 0.4955648643191819


Fold5 Ep16 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep16 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 16 train_loss=0.623557 val_loss=0.915567 val_weightedR2=0.371574


Fold5 Ep17 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep17 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 17 train_loss=0.611919 val_loss=0.711610 val_weightedR2=0.591335
  New best val weighted R2: 0.5913351841884185


Fold5 Ep18 train:   0%|          | 0/36 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold5 Ep18 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1601, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7c3b385b4680>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 1618, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/dataloader.py", line 16

Fold5 Epoch 18 train_loss=0.599180 val_loss=0.929541 val_weightedR2=0.488444


Fold5 Ep19 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep19 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 19 train_loss=0.600268 val_loss=0.750303 val_weightedR2=0.569493


Fold5 Ep20 train:   0%|          | 0/36 [00:00<?, ?it/s]

Fold5 Ep20 val  :   0%|          | 0/9 [00:00<?, ?it/s]

Fold5 Epoch 20 train_loss=0.578301 val_loss=0.924167 val_weightedR2=0.383932
Early stopping triggered at epoch 20. Best val R2=0.591335
Fold 5: restored best model state (val weighted R2=0.591335)


Fold5 Test infer:   0%|          | 0/1 [00:00<?, ?it/s]

Submission

In [14]:
rows = []
for i,row in test_images.reset_index().iterrows():
    stem = row["image_file_stem"]
    for tname, val in zip(TARGET_NAMES, test_preds_avg[i]):
        rows.append({"sample_id": f"{stem}__{tname}", "target": float(val)})
submission = pd.DataFrame(rows)
submission.to_csv("./submission.csv", index=False)
print("Wrote submission.csv with", len(submission), "rows")


Wrote submission.csv with 5 rows


AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA