#PREDICCION DE BIOMASA
## EQUIPO:
FLORES ALVAREZ CHRISTIAN ALEXIS

LOPEZ ROQUE ORLANDO

In [168]:
import os

root = "/content/data/images"

for path, dirs, files in os.walk(root):
    print(path, "->", len(files), "files")



/content/data/images -> 0 files
/content/data/images/test -> 358 files
/content/data/images/__MACOSX -> 1 files
/content/data/images/__MACOSX/test -> 358 files
/content/data/images/.ipynb_checkpoints -> 0 files


In [169]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models


In [170]:
train_df = pd.read_csv("/content/data/train.csv")
test_df = pd.read_csv("/content/data/test.csv")

train_df.head()


Unnamed: 0,sample_id,image_path,Sampling_Date,State,Species,Pre_GSHH_NDVI,Height_Ave_cm,target_name,target
0,ID1011485656__Dry_Clover_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Clover_g,0.0
1,ID1011485656__Dry_Dead_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Dead_g,31.9984
2,ID1011485656__Dry_Green_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Green_g,16.2751
3,ID1011485656__Dry_Total_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,Dry_Total_g,48.2735
4,ID1011485656__GDM_g,train/ID1011485656.jpg,2015/9/4,Tas,Ryegrass_Clover,0.62,4.6667,GDM_g,16.275


In [171]:
train_df["image_path"] = "/content/data/images/test/" + train_df["image_path"].str.replace("train/", "")
test_df["image_path"] = "/content/data/images/test/" + test_df["image_path"].str.replace("test/", "")

train_df.head(), test_df.head()


(                    sample_id                                  image_path  \
 0  ID1011485656__Dry_Clover_g  /content/data/images/test/ID1011485656.jpg   
 1    ID1011485656__Dry_Dead_g  /content/data/images/test/ID1011485656.jpg   
 2   ID1011485656__Dry_Green_g  /content/data/images/test/ID1011485656.jpg   
 3   ID1011485656__Dry_Total_g  /content/data/images/test/ID1011485656.jpg   
 4         ID1011485656__GDM_g  /content/data/images/test/ID1011485656.jpg   
 
   Sampling_Date State          Species  Pre_GSHH_NDVI  Height_Ave_cm  \
 0      2015/9/4   Tas  Ryegrass_Clover           0.62         4.6667   
 1      2015/9/4   Tas  Ryegrass_Clover           0.62         4.6667   
 2      2015/9/4   Tas  Ryegrass_Clover           0.62         4.6667   
 3      2015/9/4   Tas  Ryegrass_Clover           0.62         4.6667   
 4      2015/9/4   Tas  Ryegrass_Clover           0.62         4.6667   
 
     target_name   target  
 0  Dry_Clover_g   0.0000  
 1    Dry_Dead_g  31.9984  
 2   D

In [172]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])



In [173]:
class BiomassDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, train=True):
        self.df = df
        self.img_dir = img_dir
        self.transform = transform
        self.train = train

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        img_path = row["image_path"]
        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        tabular_feats = torch.tensor(
            row[["Pre_GSHH_NDVI", "Height_Ave_cm"]].values.astype(np.float32),
            dtype=torch.float32
        )

        if self.train:
            y = torch.tensor(row["target"], dtype=torch.float32)
            return img, tabular_feats, y

        return img, tabular_feats


In [174]:
# Asegurar que las columnas tabulares son numéricas
for col in ["Pre_GSHH_NDVI", "Height_Ave_cm"]:
    train_df[col] = pd.to_numeric(train_df[col], errors="coerce")
    train_df[col] = train_df[col].fillna(train_df[col].mean())

# Train / val split
train_data, val_data = train_test_split(train_df, test_size=0.2, random_state=42)

img_dir = "/content/data/images/test"

train_dataset = BiomassDataset(train_data, img_dir, transform=transform, train=True)
val_dataset = BiomassDataset(val_data, img_dir, transform=transform, train=True)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


In [175]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Linear(model.fc.in_features, 1)
model = model.to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)


In [176]:
epochs = 5

for epoch in range(epochs):
    model.train()
    train_loss = 0

    for imgs, tabular_feats, targets in train_loader:
        imgs, tabular_feats, targets = imgs.to(device), tabular_feats.to(device), targets.to(device)

        optimizer.zero_grad()

        outputs = model(imgs).squeeze()

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Validación
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for imgs, tabular_feats, targets in val_loader:
            imgs, tabular_feats, targets = imgs.to(device), tabular_feats.to(device), targets.to(device)

            outputs = model(imgs).squeeze()
            val_loss += criterion(outputs, targets).item()

    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {train_loss/len(train_loader):.4f} | Val Loss: {val_loss/len(val_loader):.4f}")


Epoch 1/5 | Train Loss: 1147.1401 | Val Loss: 1203.9348
Epoch 2/5 | Train Loss: 937.7248 | Val Loss: 972.5845
Epoch 3/5 | Train Loss: 833.0358 | Val Loss: 781.9461
Epoch 4/5 | Train Loss: 751.5292 | Val Loss: 710.8830
Epoch 5/5 | Train Loss: 706.6785 | Val Loss: 745.8051


In [177]:
class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row["image_path"]).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

test_dataset = TestDataset(test_df, transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [178]:
model.eval()
predictions = []

with torch.no_grad():
    for imgs in test_loader:
        imgs = imgs.to(device)
        outputs = model(imgs).squeeze()
        predictions.extend(outputs.cpu().numpy())


In [179]:
submission = pd.DataFrame({
    "ID": test_df["sample_id"],
    "biomass": predictions
})

submission.to_csv("submission.csv", index=False)
submission.head()


Unnamed: 0,ID,biomass
0,ID1001187975__Dry_Clover_g,12.991301
1,ID1001187975__Dry_Dead_g,12.991301
2,ID1001187975__Dry_Green_g,12.991301
3,ID1001187975__Dry_Total_g,12.991301
4,ID1001187975__GDM_g,12.991301
