# Height_Ave_cm Pytorch Lightning CNN Regressor

In [None]:
!pip install lightning

In [None]:
import os
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split
from torch.utils.data import DataLoader, Dataset, Subset
from torch.utils.data import random_split, SubsetRandomSampler
from torchvision import datasets, transforms, models 
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid

from lightning.pytorch import LightningDataModule
from lightning.pytorch import LightningModule
from lightning.pytorch import Trainer
import lightning.pytorch as L
print(L.__version__)

import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from PIL import Image

In [None]:
transform = transforms.Compose([
            transforms.Resize(224),             # resize shortest side to 224 pixels
            transforms.CenterCrop(224),         # crop longest side to 224 pixels at center            
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],
                                 [0.229, 0.224, 0.225])
        ])

In [None]:
train=pd.read_csv('/kaggle/input/csiro-biomass/train.csv')
filenames=train['image_path'].apply(lambda x:x.split('/')[-1])
height_values=train['Height_Ave_cm']
ndvi_values=train['Pre_GSHH_NDVI']
height_mapping=dict(zip(filenames,height_values))
ndvi_mapping=dict(zip(filenames,ndvi_values))

In [None]:
print(height_mapping)

In [None]:
dir0='/kaggle/input/csiro-biomass/train'
path_label=[]
for dirname, _, filenames in os.walk(dir0):
    for filename in filenames:
        path=(os.path.join(dirname, filename))
        height=height_mapping.get(filename,None)
        path_label+=[(path,height)]
        
#path_label = random.sample(path_label,1000)
print(path_label[0:3])

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, path_label, transform=None):
        self.path_label = path_label
        self.transform = transform

    def __len__(self):
        return len(self.path_label)

    def __getitem__(self, idx):
        path, label = self.path_label[idx]
        img = Image.open(path).convert('RGB')

        if self.transform is not None:
            img = self.transform(img)

        return img, label

In [None]:
class DataModule(LightningDataModule):
    def __init__(self, path_label=None, root_dir=None, batch_size=32):
        super().__init__()
        self.path_label = path_label
        self.root_dir = root_dir
        self.batch_size = batch_size
        
        # Define your transforms
        self.transform = transforms.Compose([
            transforms.Resize(224),             # resize shortest side to 224 pixels
            transforms.CenterCrop(224),         # crop longest side to 224 pixels at center            
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],
                               [0.229, 0.224, 0.225])
        ])
        
        # Initialize datasets
        self.train_dataset = None
        self.test_dataset = None

    def setup(self, stage=None):
        # Create dataset based on what's provided
        if self.path_label is not None:
            dataset = CustomDataset(self.path_label, self.transform)
        elif self.root_dir is not None:
            dataset = datasets.ImageFolder(root=self.root_dir, transform=self.transform)
        else:
            raise ValueError("Either path_label or root_dir must be provided")
        
        dataset_size = len(dataset)
        train_size = int(0.8 * dataset_size) 
        test_size = dataset_size - train_size

        # Split dataset
        self.train_dataset = Subset(dataset, range(train_size))
        self.test_dataset = Subset(dataset, range(train_size, dataset_size))

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)

    def predict_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size)

    def __len__(self):
        if self.train_dataset is not None:
            return len(self.train_dataset)
        elif self.test_dataset is not None:
            return len(self.test_dataset)
        else:
            return 0


## ConvolutionalRegressor

In [None]:
class ConvolutionalRegressor(LightningModule):
    
    def __init__(self):
        super(ConvolutionalRegressor, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 3, 1)
        self.conv2 = nn.Conv2d(6, 16, 3, 1)
        self.fc1 = nn.Linear(16 * 54 * 54, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 20)
        self.fc4 = nn.Linear(20, 1)  # Regression: output 1 value

    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X, 2, 2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X, 2, 2)
        X = X.view(-1, 16 * 54 * 54)
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = F.relu(self.fc3(X))
        X = self.fc4(X)  # Output a continuous value
        return X.squeeze(1)  # Output shape: [batch_size]

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.001)

    def training_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self(X)
        loss = F.mse_loss(y_hat, y.float())
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self(X)
        loss = F.mse_loss(y_hat, y.float())
        self.log("val_loss", loss)

    def test_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self(X)
        loss = F.mse_loss(y_hat, y.float())
        self.log("test_loss", loss)


In [None]:
if __name__ == '__main__':
    datamodule = DataModule(path_label=path_label)
    datamodule.setup()

In [None]:
    model = ConvolutionalRegressor()
    trainer = L.Trainer(max_epochs=1000)
    trainer.fit(model, datamodule)

In [None]:
    datamodule.setup(stage='test')
    test_loader = datamodule.test_dataloader()
    trainer.test(dataloaders=test_loader)

In [None]:
for images, labels in datamodule.train_dataloader():
    break
im=make_grid(images,nrow=16)

plt.figure(figsize=(12,12))
plt.imshow(np.transpose(im.numpy(),(1,2,0)))

inv_normalize=transforms.Normalize(mean=[-0.485/0.229,-0.456/0.224,-0.406/0.225],
                                   std=[1/0.229,1/0.224,1/0.225])
im=inv_normalize(im)

plt.figure(figsize=(12,12))
plt.imshow(np.transpose(im.numpy(),(1,2,0)))

In [None]:
device = torch.device("cpu")   #"cuda:0"

model.eval()
y_true=[]
y_pred=[]
with torch.no_grad():
    for test_data in datamodule.test_dataloader():
        test_images, test_labels = test_data[0].to(device), test_data[1].to(device)
        pred = model(test_images)  ##########
        for i in range(len(pred)):
            y_true.append(test_labels[i].item())
            y_pred.append(pred[i].item())

In [None]:
min(y_pred),max(y_pred)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 6))
plt.scatter(y_true, y_pred, alpha=0.5, color='blue')
plt.xlabel("True Values")
plt.ylabel("Predicted Values")
plt.title("Scatter Plot of True vs Predicted (Age)")
plt.grid(True)
plt.show()