If best_fusion_multi.pth is available, no need to train the NN again, just run cells 1-2 and 12-14 to get the results.

1. Install and imports

In [2]:
# If your environment already has these, you can skip installs.
#!pip install torch torchvision pandas scikit-learn tqdm --quiet

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, KFold
from datetime import datetime
from tqdm import tqdm
import pathlib
import os


2. Configuration (paths, target list, hyperparams)

In [None]:
# --- Paths ---
KAGGLE_INPUT_DIR = "" # "/kaggle/input/csiro-biomass/"
KAGGLE_OUTPUT_DIR = "" #"/kaggle/working/"
TRAIN_CSV = KAGGLE_INPUT_DIR+"train.csv"
TEST_CSV = KAGGLE_INPUT_DIR+"test.csv"
TRAIN_IMG_ROOT = pathlib.Path(KAGGLE_INPUT_DIR+"train")  # not prepended if CSV has full/relative paths; we'll handle both
TEST_IMG_ROOT = pathlib.Path(KAGGLE_INPUT_DIR+"test")
SAMPLE_SUBMISSION = KAGGLE_OUTPUT_DIR+"sample_submission.csv"  # optional template; if missing we'll build from test.csv

# --- Targets (expected target_name values) ---
TARGET_NAMES = [
    "Dry_Clover_g",
    "Dry_Dead_g",
    "Dry_Green_g",
    "Dry_Total_g",
    "GDM_g"
]

# --- Training hyperparameters (tweak as needed) ---
IMG_SIZE = 224
BATCH_SIZE = 32
LR = 1e-4
EPOCHS = 1
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
WORKERS = 1
PIN_MEMORY = False
N_SPLITS = 5
KFOLD_RANDOM_STATE = 42
EARLY_STOP_PATIENCE = 5
print("Device:", DEVICE)


Device: cpu


3. Load & pivot train.csv into one row per sample (with tabular features + multi-target vector)

In [4]:
train_raw = pd.read_csv(TRAIN_CSV)
train_raw.head()

Unnamed: 0,sample_id,image_path,Sampling_Date,State,Species,Pre_GSHH_NDVI,Height_Ave_cm,target_name,target
0,ID1011485656__Dry_Clover_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Clover_g,0.0
1,ID1011485656__Dry_Dead_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Dead_g,31.9984
2,ID1011485656__Dry_Green_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Green_g,16.2751
3,ID1011485656__Dry_Total_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Total_g,48.2735
4,ID1011485656__GDM_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,GDM_g,16.275


In [5]:
# Ensure image_path is string
train_raw["image_path"] = train_raw["image_path"].astype(str)

# We'll build a per-sample table:
# - take the first image_path found for each sample_id (CSV may duplicate)
# - gather tabular features from the first row for that sample (Sampling_Date, State, Species, Pre_GSHH_NDVI, Height_Ave_cm)
# - pivot target values into columns (one column per target_name)

# pivot targets
targets_pivot = train_raw.pivot_table(
    index="sample_id",
    columns="target_name",
    values="target",
    aggfunc="first"
)

# Get one row per sample for other columns (image path and metadata) by taking first occurrence
meta_cols = ["image_path", "Sampling_Date", "State", "Species", "Pre_GSHH_NDVI", "Height_Ave_cm"]
meta = train_raw.groupby("sample_id").first()[meta_cols]

# join
train_samples = meta.join(targets_pivot)
train_samples = train_samples.reset_index()
print("Number of unique samples:", len(train_samples))
train_samples.head()


Number of unique samples: 1785


Unnamed: 0,sample_id,image_path,Sampling_Date,State,Species,Pre_GSHH_NDVI,Height_Ave_cm,Dry_Clover_g,Dry_Dead_g,Dry_Green_g,Dry_Total_g,GDM_g
0,ID1011485656__Dry_Clover_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,0.0,,,,
1,ID1011485656__Dry_Dead_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,,31.9984,,,
2,ID1011485656__Dry_Green_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,,,16.2751,,
3,ID1011485656__Dry_Total_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,,,,48.2735,
4,ID1011485656__GDM_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,,,,,16.275


4. Preprocess tabular features (date cyclic, one-hot, scaling)

In [6]:
# Date cyclic encoding (day of year -> sin/cos)
def add_date_features(df, date_col="Sampling_Date"):
    # parse dates safely:
    dates = pd.to_datetime(df[date_col], errors="coerce")
    dayofyear = dates.dt.dayofyear.fillna(1).astype(int)  # default to 1 if missing
    df["date_sin"] = np.sin(2 * np.pi * dayofyear / 365.25)
    df["date_cos"] = np.cos(2 * np.pi * dayofyear / 365.25)
    return df

train_samples = add_date_features(train_samples)

# Fit OneHotEncoder on State and Species (train only)
cat_cols = ["State", "Species"]
enc = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
enc.fit(train_samples[cat_cols].fillna(""))

# Numeric features to scale
num_cols = ["Pre_GSHH_NDVI", "Height_Ave_cm"]
scaler = StandardScaler()
scaler.fit(train_samples[num_cols].fillna(0).values)

# Build tabular matrix for train samples (order matters)
def build_tabular_matrix(df, enc, scaler):
    # date sin/cos
    date_part = df[["date_sin", "date_cos"]].values.astype(float)
    # numeric scaled
    num_part = df[num_cols].fillna(0).values.astype(float)
    num_part_scaled = scaler.transform(num_part)
    # categorical one-hot (enc handles unknowns if present)
    cat_part = enc.transform(df[cat_cols].fillna(""))
    # final concat
    return np.hstack([date_part, num_part_scaled, cat_part]).astype(np.float32)

tabular_all = build_tabular_matrix(train_samples, enc, scaler)
print("Tabular shape:", tabular_all.shape)


Tabular shape: (1785, 23)


5. Build multi-target arrays and mask (for missing targets)

In [7]:
# Create target matrix with ordering given by TARGET_NAMES
def build_targets_and_mask(df, target_names):
    n = len(df)
    m = len(target_names)
    targets = np.zeros((n, m), dtype=np.float32)
    mask = np.zeros((n, m), dtype=np.float32)  # 1 if target exists, 0 if missing
    for i, row in df.iterrows():
        for j, tname in enumerate(target_names):
            val = row.get(tname, np.nan)
            if pd.notna(val):
                targets[i, j] = float(val)
                mask[i, j] = 1.0
    return targets, mask

targets_all, mask_all = build_targets_and_mask(train_samples, TARGET_NAMES)
print("Targets shape:", targets_all.shape, "Mask shape:", mask_all.shape)
# quick check counts
print("Observed target counts per output:", mask_all.sum(axis=0))


Targets shape: (1785, 5) Mask shape: (1785, 5)
Observed target counts per output: [357. 357. 357. 357. 357.]


6. Train / Validation split (split by sample)

In [8]:
# split samples (we'll split by index/sample not per-row)
train_idx, val_idx = train_test_split(np.arange(len(train_samples)), test_size=0.2, random_state=42)

# prepare arrays
X_tab_train = tabular_all[train_idx]
X_tab_val = tabular_all[val_idx]
y_train = targets_all[train_idx]
y_val = targets_all[val_idx]
mask_train = mask_all[train_idx]
mask_val = mask_all[val_idx]
df_train = train_samples.iloc[train_idx].reset_index(drop=True)
df_val = train_samples.iloc[val_idx].reset_index(drop=True)

print("Train samples:", len(df_train), "Val samples:", len(df_val))


Train samples: 1428 Val samples: 357


7. Dataset classes (train/val & test) — robust path handling

In [9]:
class MultiTargetPastureDataset(Dataset):
    def __init__(self, df, tabular_array, targets_array=None, mask_array=None, transform=None, img_root=None):
        """
        df: DataFrame with sample rows (must include image_path)
        tabular_array: numpy array aligned with df rows
        targets_array: numpy array (N, M) or None for test
        mask_array: numpy array (N, M) same shape as targets (1 where present)
        transform: image transform
        img_root: pathlib.Path root folder to prepend IF image_path is relative missing folder
        """
        self.df = df.reset_index(drop=True)
        self.tab = torch.tensor(tabular_array, dtype=torch.float32)
        self.targets = torch.tensor(targets_array, dtype=torch.float32) if targets_array is not None else None
        self.mask = torch.tensor(mask_array, dtype=torch.float32) if mask_array is not None else None
        self.transform = transform
        self.img_root = pathlib.Path(img_root) if img_root is not None else None

    def __len__(self):
        return len(self.df)

    def _resolve_path(self, raw_path):
        # raw_path may already include train/ or be just filename.
        p = pathlib.Path(raw_path)
        if p.exists():
            return p
        # try with img_root prepended if provided
        if self.img_root is not None:
            p2 = self.img_root / p
            if p2.exists():
                return p2
        # try filename only in img_root
        if self.img_root is not None and "/" in str(raw_path):
            # try last part
            last = pathlib.Path(raw_path).name
            p3 = self.img_root / last
            if p3.exists():
                return p3
        # fallback: return original path (will raise later)
        return p

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self._resolve_path(row["image_path"])
        if not img_path.exists():
            raise FileNotFoundError(f"Image not found: {img_path}")
        img = Image.open(str(img_path)).convert("RGB")
        if self.transform:
            img = self.transform(img)
        tab = self.tab[idx]
        if self.targets is not None:
            y = self.targets[idx]
            m = self.mask[idx]
            return img, tab, y, m
        else:
            # test
            return img, tab, row["sample_id"]

In [10]:
class TestPastureDataset(Dataset):
    def __init__(self, df, tabular_array, transform=None, img_root=None):
        self.df = df.reset_index(drop=True)
        self.tab = torch.tensor(tabular_array, dtype=torch.float32)
        self.transform = transform
        self.img_root = pathlib.Path(img_root) if img_root is not None else None

    def __len__(self):
        return len(self.df)

    def _resolve_path(self, raw_path):
        p = pathlib.Path(raw_path)
        if p.exists():
            return p
        if self.img_root is not None:
            p2 = self.img_root / p
            if p2.exists():
                return p2
            last = pathlib.Path(raw_path).name
            p3 = self.img_root / last
            if p3.exists():
                return p3
        return p

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self._resolve_path(row["image_path"])
        if not img_path.exists():
            raise FileNotFoundError(f"Test image not found: {img_path}")
        img = Image.open(str(img_path)).convert("RGB")
        if self.transform:
            img = self.transform(img)
        tab = self.tab[idx]
        return img, tab, row["sample_id"]


8. Transforms & DataLoaders

In [11]:
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.9, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),

    transforms.ColorJitter(
        brightness=0.1,
        contrast=0.1,
        saturation=0.05,
        hue=0.02
    ),

    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

train_dataset = MultiTargetPastureDataset(df_train, X_tab_train, y_train, mask_train, transform=train_transform, img_root=TRAIN_IMG_ROOT)
val_dataset   = MultiTargetPastureDataset(df_val,   X_tab_val,   y_val,   mask_val,   transform=val_transform,   img_root=TRAIN_IMG_ROOT)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS, pin_memory=PIN_MEMORY)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=PIN_MEMORY)

print("Train batches:", len(train_loader), "Val batches:", len(val_loader))


Train batches: 45 Val batches: 12


9. Model: EfficientNet backbone + tabular MLP + fusion head (outputs 5 values)

In [12]:
class FusionMultiOutputModel(nn.Module):
    def __init__(self, tabular_dim, num_outputs=len(TARGET_NAMES), backbone_name="efficientnet_b0", fusion_hidden=128):
        super().__init__()
        if backbone_name == "efficientnet_b0":
            # torchvision EfficientNetB0
            self.backbone = models.efficientnet_b0(weights=None)
            # remove classifier
            self.backbone.classifier = nn.Identity()
            image_feat_dim = 1280
        elif backbone_name == "resnet18":
            self.backbone = models.resnet18(weights=None)  # train from scratch
            self.backbone.fc = nn.Identity()  # remove classifier
            image_feat_dim = 512

        # tabular MLP
        self.tab_mlp = nn.Sequential(
            nn.Linear(tabular_dim, max(32, tabular_dim*2)),
            nn.ReLU(),
            nn.BatchNorm1d(max(32, tabular_dim*2)),
            nn.Linear(max(32, tabular_dim*2), 64),
            nn.ReLU()
        )

        # fusion head
        self.fusion = nn.Sequential(
            nn.Linear(image_feat_dim + 64, fusion_hidden),
            nn.ReLU(),
            nn.BatchNorm1d(fusion_hidden),
            nn.Dropout(0.4), # changed to a stronger dropout
            nn.Linear(fusion_hidden, fusion_hidden//2),
            nn.ReLU(),
            nn.Linear(fusion_hidden//2, num_outputs)  # multi-output
        )

    def forward(self, img, tab):
        img_feat = self.backbone(img)
        tab_feat = self.tab_mlp(tab)
        x = torch.cat([img_feat, tab_feat], dim=1)
        out = self.fusion(x)
        return out

# instantiate
tab_dim = X_tab_train.shape[1]
model = FusionMultiOutputModel(tabular_dim=tab_dim).to(DEVICE)


10. Loss that respects mask (MAE only where mask==1) + optimizer

In [13]:
def masked_mae_loss(preds, targets, mask):
    """
    preds: (B, M)
    targets: (B, M)
    mask: (B, M)  -> 1 where target exists, 0 where missing
    returns average MAE over observed entries
    """
    diff = torch.abs(preds - targets) * mask
    # sum diffs and divide by number of observed elements (avoid zero divide)
    denom = mask.sum()
    if denom.item() == 0:
        return torch.tensor(0.0, device=preds.device)
    return diff.sum() / denom

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-5)


11. Training & validation loop (saves best model)

In [None]:
best_fold_models = []  # store best model per fold
kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=KFOLD_RANDOM_STATE)
all_fold_val_maes = []

                "best_fold_models = []  # will store best model (single split)",
                "all_fold_val_maes = []",
                "",
                "# Use a single random train/val split instead of KFold",
                "idxs = np.arange(len(train_samples))",
                "train_idx, val_idx = train_test_split(idxs, test_size=1.0/N_SPLITS, random_state=KFOLD_RANDOM_STATE)",
                "print(f\"Single split: train={len(train_idx)} val={len(val_idx)}\")",
                "",
                "# prepare arrays (same naming as fold version to minimize downstream changes)",
                "X_tab_train_fold = tabular_all[train_idx]",
                "X_tab_val_fold = tabular_all[val_idx]",
                "y_train_fold = targets_all[train_idx]",
                "y_val_fold = targets_all[val_idx]",
                "mask_train_fold = mask_all[train_idx]",
                "mask_val_fold = mask_all[val_idx]",
                "",
                "df_train_fold = train_samples.iloc[train_idx].reset_index(drop=True)",
                "df_val_fold = train_samples.iloc[val_idx].reset_index(drop=True)",
                "",
                "# datasets and loaders",
                "train_dataset = MultiTargetPastureDataset(df_train_fold, X_tab_train_fold, y_train_fold, mask_train_fold,",
                "                                          transform=train_transform, img_root=TRAIN_IMG_ROOT)",
                "val_dataset   = MultiTargetPastureDataset(df_val_fold, X_tab_val_fold, y_val_fold, mask_val_fold,",
                "                                          transform=val_transform, img_root=TRAIN_IMG_ROOT)",
                "",
                "train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=WORKERS, pin_memory=PIN_MEMORY)",
                "val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=PIN_MEMORY)",
                "",
                "# instantiate model",
                "model_fold = FusionMultiOutputModel(tabular_dim=tab_dim).to(DEVICE)",
                "optimizer = torch.optim.AdamW(model_fold.parameters(), lr=LR, weight_decay=1e-5)",
                "",
                "best_val = float(\"inf\")",
                "best_state_dict = None",
                "",
                "for epoch in range(1, EPOCHS+1):",
                "    # --- train ---",
                "    model_fold.train()",
                "    train_loss = 0.0",
                "    seen = 0",
                "    for imgs, tabs, ys, masks in tqdm(train_loader, desc=f\"Epoch {epoch} train\"):",
                "        imgs = imgs.to(DEVICE)",
                "        tabs = tabs.to(DEVICE)",
                "        ys = ys.to(DEVICE)",
                "        masks = masks.to(DEVICE)",
                "",
                "        preds = model_fold(imgs, tabs)",
                "        loss = masked_mae_loss(preds, ys, masks)",
                "",
                "        optimizer.zero_grad()",
                "        loss.backward()",
                "        optimizer.step()",
                "",
                "        batch_n = imgs.size(0)",
                "        train_loss += loss.item() * batch_n",
                "        seen += batch_n",
                "",
                "    train_loss_epoch = train_loss / max(seen,1)",
                "",
                "    # --- validate ---",
                "    model_fold.eval()",
                "    val_loss = 0.0",
                "    seen_val = 0",
                "    with torch.no_grad():",
                "        for imgs, tabs, ys, masks in tqdm(val_loader, desc=f\"Epoch {epoch} val\"):",
                "            imgs = imgs.to(DEVICE)",
                "            tabs = tabs.to(DEVICE)",
                "            ys = ys.to(DEVICE)",
                "            masks = masks.to(DEVICE)",
                "",
                "            preds = model_fold(imgs, tabs)",
                "            loss = masked_mae_loss(preds, ys, masks)",
                "            batch_n = imgs.size(0)",
                "            val_loss += loss.item() * batch_n",
                "            seen_val += batch_n",
                "",
                "    val_loss_epoch = val_loss / max(seen_val,1)",
                "    print(f\"Epoch {epoch}: Train MAE={train_loss_epoch:.4f}  Val MAE={val_loss_epoch:.4f}\")",
                "",
                "    if val_loss_epoch < best_val:",
                "        best_val = val_loss_epoch",
                "        best_state_dict = model_fold.state_dict()",
                "        epochs_no_improve = 0",
                "        print(f\"New best model! Val MAE={best_val:.4f}\")",
                "    else:",
                "        epochs_no_improve += 1",
                "        if epochs_no_improve >= EARLY_STOP_PATIENCE:",
                "            print(f\"Early stopping at epoch {epoch} (no improvement in {EARLY_STOP_PATIENCE} epochs)\")",
                "            break  ",
                "",
                "all_fold_val_maes.append(best_val)",
                "best_fold_models.append(best_state_dict)",
                "",
                "print(\"\nValidation MAE:\", all_fold_val_maes)",
                "print(\"Mean MAE:\", np.mean(all_fold_val_maes))"
    
    all_fold_val_maes.append(best_val)
    best_fold_models.append(best_state_dict)

print("\nAll fold validation MAEs:", all_fold_val_maes)
print("Mean MAE across folds:", np.mean(all_fold_val_maes))


=== Fold 1/5 ===


Fold 1 Epoch 1 train:   0%|          | 0/45 [00:00<?, ?it/s]

12. Prepare test samples (build per-sample table) and tabular zeros fallback

In [None]:
test_raw = pd.read_csv(TEST_CSV)
test_raw["image_path"] = test_raw["image_path"].astype(str)

# Test has one row per sample_id x target_name (like the sample submission)
# We need unique sample rows (one image per sample_id)
test_samples = test_raw.groupby("sample_id").first().reset_index()[["sample_id", "image_path"]]

# Build tabular for test: test CSV lacks metadata, so we create zeros for tabular inputs
# But we must match the encoder's feature length: date(2) + num(2) + cat(len)
date_placeholders = np.zeros((len(test_samples), 2), dtype=np.float32)
num_placeholders = np.zeros((len(test_samples), len(num_cols)), dtype=np.float32)
# scaled num => zeros (same transform)
num_scaled_test = (num_placeholders - scaler.mean_) / scaler.scale_
# categorical => zeros (no categories known)
cat_dim = sum(len(cats) for cats in enc.categories_)
cat_zeros = np.zeros((len(test_samples), cat_dim), dtype=np.float32)

tabular_test = np.hstack([date_placeholders, num_scaled_test, cat_zeros]).astype(np.float32)
print("Test tabular shape:", tabular_test.shape)
test_samples.head()


Test tabular shape: (10, 23)


Unnamed: 0,sample_id,image_path
0,ID1001187975__Dry_Clover_g,test/ID1001187975.jpg
1,ID1001187975__Dry_Dead_g,test/ID1001187975.jpg
2,ID1001187975__Dry_Green_g,test/ID1001187975.jpg
3,ID1001187975__Dry_Total_g,test/ID1001187975.jpg
4,ID1001187975__GDM_g,test/ID1001187975.jpg


13. Test dataloader & inference (produce predictions per sample)

In [None]:
test_dataset = TestPastureDataset(test_samples, tabular_test, transform=val_transform, img_root=TEST_IMG_ROOT)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=WORKERS, pin_memory=PIN_MEMORY)

# Initialize predictions storage: sample_id -> list of predicted vectors
pred_map_folds = {sid: [] for sid in test_samples["sample_id"].values}

with torch.no_grad():
    for fold_idx, best_state_dict in enumerate(best_fold_models, 1):
        print(f"Running inference with fold {fold_idx} best model")
        model.load_state_dict(best_state_dict)
        model.eval()
        
        for imgs, tabs, sample_ids in tqdm(test_loader, desc=f"Fold {fold_idx} inference"):
            imgs = imgs.to(DEVICE)
            tabs = tabs.to(DEVICE)
            preds = model(imgs, tabs)  # (B, M)
            preds_np = preds.cpu().numpy()  # (B, M)
            
            for sid, pred_vals in zip(sample_ids, preds_np):
                pred_map_folds[sid].append(pred_vals)

# Average predictions across folds
pred_map_avg = {sid: np.mean(pred_list, axis=0) for sid, pred_list in pred_map_folds.items()}


Loaded best model from memory


Test infer: 100%|██████████| 1/1 [00:00<00:00,  1.11it/s]


10

In [None]:
submission_df = pd.DataFrame({
    "sample_id": list(pred_map_avg.keys()),
    "target": list(pred_map_avg.values())
})

submission_df["target"] = submission_df["target"].clip(lower=0).round(2)
submission_df.to_csv(KAGGLE_OUTPUT_DIR+"submission.csv", index=False)

print("Saved submission.csv with", len(submission_df), "rows")
submission_df


OSError: Cannot save file into a non-existent directory: '\kaggle\working'