## Modality branch

In [19]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os

class ModalityBranch(nn.Module):
    def __init__(self, input_channels=3, name='modality'):
        super(ModalityBranch, self).__init__()
        self.name = name
        # Use the updated method to load pretrained weights
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        if input_channels != 3:
            self.resnet.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        self.fc = nn.Linear(2048, 1024)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.resnet(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.relu(x)
        return x

class MultiModalModel(nn.Module):
    def __init__(self, rgb_branch, depth_branch, thermal_branch):
        super(MultiModalModel, self).__init__()
        self.rgb_branch = rgb_branch
        self.depth_branch = depth_branch
        self.thermal_branch = thermal_branch
        self.fusion = lambda x: torch.max(torch.stack(x), dim=0)[0] # Max pooling
        # self.fusion = lambda x: torch.cat(x, dim=1)  # Concatenation instead of max pooling

        self.regressor = nn.Linear(1024, 1)

    def forward(self, rgb, depth, thermal):
        rgb_features = self.rgb_branch(rgb)
        depth_features = self.depth_branch(depth)
        thermal_features = self.thermal_branch(thermal)
        fused_features = self.fusion([rgb_features, depth_features, thermal_features])
        output = self.regressor(fused_features)
        return output

class LeafCountingDataset(Dataset):
    def __init__(self, csv_file, rgb_dir, depth_dir, thermal_dir, transform=None):
        self.data = pd.read_csv(csv_file, dtype={0: str})

        self.rgb_dir = rgb_dir
        self.depth_dir = depth_dir
        self.thermal_dir = thermal_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        try:
            img_name = self.data.iloc[idx, 0].strip()
            leaf_count = self.data.iloc[idx, 1]

            rgb_path = os.path.join(self.rgb_dir, img_name + '.png')
            depth_path = os.path.join(self.depth_dir, img_name + '_coldepth.png')
            thermal_path = os.path.join(self.thermal_dir, img_name + '_coldepth.png' )

            rgb_image = Image.open(rgb_path).convert('RGB')
            depth_image = Image.open(depth_path).convert('L')
            thermal_image = Image.open(thermal_path).convert('L')
            
            # depth_image = depth_image.convert('RGB')
            # thermal_image = thermal_image.convert('RGB')

            if self.transform:
                rgb_image = self.transform(rgb_image)  # Apply transformations for 3-channel
             # For depth and thermal, normalize them separately since they are single-channel images
                depth_image = transforms.functional.to_tensor(depth_image)  # Convert to tensor (single-channel)
                depth_image = transforms.functional.normalize(depth_image, mean=[0.5], std=[0.5])  # Normalize
                thermal_image = transforms.functional.to_tensor(thermal_image)  # Convert to tensor (single-channel)
                thermal_image = transforms.functional.normalize(thermal_image, mean=[0.5], std=[0.5])  # Normalize

            return rgb_image, depth_image, thermal_image, torch.tensor(leaf_count, dtype=torch.float)
        except Exception as e:
            print(f"Error loading image {img_name}: {str(e)}")
            return None

# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create dataset and dataloader
dataset = LeafCountingDataset(
    csv_file= r"C:\Users\praph\Downloads\Bottlegourd_week3.csv.txt",
    rgb_dir= r"C:\Users\praph\Downloads\week3_cropped_bottlegourd_RGB",
    depth_dir= r"C:\Users\praph\Downloads\week3_cropped_coldepth",
    thermal_dir= r"C:\Users\praph\Downloads\week3_cropped_coldepth",
    transform=transform
)

def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=0, collate_fn=collate_fn)

# Create model
rgb_branch = ModalityBranch(input_channels=3, name='rgb')
depth_branch = ModalityBranch(input_channels=1, name='depth')
thermal_branch = ModalityBranch(input_channels=1, name='thermal')
model = MultiModalModel(rgb_branch, depth_branch, thermal_branch)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    for rgb, depth, thermal, leaf_count in dataloader:
        rgb, depth, thermal, leaf_count = rgb.to(device), depth.to(device), thermal.to(device), leaf_count.to(device)

        optimizer.zero_grad()
        outputs = model(rgb, depth, thermal)
        loss = criterion(outputs.squeeze(), leaf_count)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print('Training finished!')

Epoch [1/20], Loss: 40.3606
Epoch [2/20], Loss: 10.4985
Epoch [3/20], Loss: 4.4075
Epoch [4/20], Loss: 3.5835
Epoch [5/20], Loss: 3.0951
Epoch [6/20], Loss: 0.3706
Epoch [7/20], Loss: 7.7322
Epoch [8/20], Loss: 1.3942
Epoch [9/20], Loss: 6.2009
Epoch [10/20], Loss: 1.1194
Epoch [11/20], Loss: 1.0015
Epoch [12/20], Loss: 0.9413
Epoch [13/20], Loss: 0.5829
Epoch [14/20], Loss: 1.5417
Epoch [15/20], Loss: 0.3573
Epoch [16/20], Loss: 1.5965
Epoch [17/20], Loss: 2.8729
Epoch [18/20], Loss: 2.6022
Epoch [19/20], Loss: 1.0276
Epoch [20/20], Loss: 2.9165
Training finished!


## Modality branch only RGB and depth

In [3]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os
import math

class ModalityBranch(nn.Module):
    def __init__(self, input_channels=3, name='modality'):
        super(ModalityBranch, self).__init__()
        self.name = name
        # Use the updated method to load pretrained weights
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        if input_channels != 3:
            self.resnet.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        self.fc = nn.Linear(2048, 1024)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.resnet(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.relu(x)
        return x

class MultiModalModel(nn.Module):
    def __init__(self, rgb_branch, depth_branch):
        super(MultiModalModel, self).__init__()
        self.rgb_branch = rgb_branch
        self.depth_branch = depth_branch
        self.fusion = lambda x: torch.max(torch.stack(x), dim=0)[0]  # Max pooling

        self.regressor = nn.Linear(1024, 1)

    def forward(self, rgb, depth):
        rgb_features = self.rgb_branch(rgb)
        depth_features = self.depth_branch(depth)
        fused_features = self.fusion([rgb_features, depth_features])
        output = self.regressor(fused_features)
        return output

class LeafCountingDataset(Dataset):
    def __init__(self, csv_file, rgb_dir, depth_dir, transform=None):
        self.data = pd.read_csv(csv_file, dtype={0: str})
        self.rgb_dir = rgb_dir
        self.depth_dir = depth_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        try:
            img_name = self.data.iloc[idx, 0].strip()
            leaf_count = self.data.iloc[idx, 1]

            rgb_path = os.path.join(self.rgb_dir, img_name + '.png')
            depth_path = os.path.join(self.depth_dir, img_name + '_coldepth.png')

            rgb_image = Image.open(rgb_path).convert('RGB')
            depth_image = Image.open(depth_path).convert('L')

            if self.transform:
                rgb_image = self.transform(rgb_image)  # Apply transformations for RGB (3-channel)
                depth_image = transforms.functional.to_tensor(depth_image)  # Convert to tensor (single-channel)
                depth_image = transforms.functional.normalize(depth_image, mean=[0.5], std=[0.5])  # Normalize

            return rgb_image, depth_image, torch.tensor(leaf_count, dtype=torch.float)
        except Exception as e:
            print(f"Error loading image {img_name}: {str(e)}")
            return None

# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create dataset and dataloader
dataset = LeafCountingDataset(
    csv_file= r"C:\Users\praph\Downloads\Bottlegourd_week3.csv.txt",
    rgb_dir= r"C:\Users\praph\Downloads\week3_cropped_bottlegourd_RGB",
    depth_dir= r"C:\Users\praph\Downloads\week3_cropped_coldepth",
    transform=transform
)

def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=0, collate_fn=collate_fn)

# Create model
rgb_branch = ModalityBranch(input_channels=3, name='rgb')
depth_branch = ModalityBranch(input_channels=1, name='depth')
model = MultiModalModel(rgb_branch, depth_branch)

# Loss function
criterion = nn.MSELoss()

# Function to calculate MSE and RMSE
def calculate_metrics(true, predicted):
    mse = criterion(predicted, true).item()
    rmse = math.sqrt(mse)
    return mse, rmse

# Training function
def train_model(model, dataloader, learning_rate, num_epochs=20):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0
        mse_epoch = 0.0
        rmse_epoch = 0.0
        count = 0
        
        for rgb, depth, leaf_count in dataloader:
            rgb, depth, leaf_count = rgb.to(device), depth.to(device), leaf_count.to(device)

            optimizer.zero_grad()
            outputs = model(rgb, depth)
            loss = criterion(outputs.squeeze(), leaf_count)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
            mse, rmse = calculate_metrics(leaf_count, outputs.squeeze())
            mse_epoch += mse
            rmse_epoch += rmse
            count += 1

        avg_loss = epoch_loss / count
        avg_mse = mse_epoch / count
        avg_rmse = rmse_epoch / count
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, MSE: {avg_mse:.4f}, RMSE: {avg_rmse:.4f}')

# Training for different learning rates
learning_rates = [0.0001, 0.001, 0.01, 0.1]
num_epochs = 20

for lr in learning_rates:
    print(f"\nTraining with learning rate: {lr}")
    model = MultiModalModel(rgb_branch, depth_branch)  # Reinitialize the model for each run
    train_model(model, dataloader, learning_rate=lr, num_epochs=num_epochs)

print('Training finished for all learning rates!')



Training with learning rate: 0.0001
Epoch [1/20], Loss: 57.9029, MSE: 57.9029, RMSE: 7.6086
Epoch [2/20], Loss: 25.1433, MSE: 25.1433, RMSE: 4.9902
Epoch [3/20], Loss: 11.5045, MSE: 11.5045, RMSE: 3.3912
Epoch [4/20], Loss: 2.0233, MSE: 2.0233, RMSE: 1.4015
Epoch [5/20], Loss: 2.1490, MSE: 2.1490, RMSE: 1.3077
Epoch [6/20], Loss: 1.9933, MSE: 1.9933, RMSE: 1.4118
Epoch [7/20], Loss: 2.2762, MSE: 2.2762, RMSE: 1.4501
Epoch [8/20], Loss: 3.8416, MSE: 3.8416, RMSE: 1.9553
Epoch [9/20], Loss: 3.6058, MSE: 3.6058, RMSE: 1.8504
Epoch [10/20], Loss: 0.8533, MSE: 0.8533, RMSE: 0.8911
Epoch [11/20], Loss: 1.1749, MSE: 1.1749, RMSE: 1.0680
Epoch [12/20], Loss: 0.7717, MSE: 0.7717, RMSE: 0.8731
Epoch [13/20], Loss: 1.0772, MSE: 1.0772, RMSE: 1.0096
Epoch [14/20], Loss: 0.8428, MSE: 0.8428, RMSE: 0.9170
Epoch [15/20], Loss: 0.6264, MSE: 0.6264, RMSE: 0.7834
Epoch [16/20], Loss: 1.0613, MSE: 1.0613, RMSE: 0.9897
Epoch [17/20], Loss: 0.5875, MSE: 0.5875, RMSE: 0.7471
Epoch [18/20], Loss: 0.2179, MS

In [2]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import os

# RGB Branch Only
class RGBBranch(nn.Module):
    def __init__(self, input_channels=3, name='rgb'):
        super(RGBBranch, self).__init__()
        self.name = name
        # Load pretrained resnet50 model
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)
        if input_channels != 3:
            self.resnet.conv1 = nn.Conv2d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-1])
        self.fc = nn.Linear(2048, 1024)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.resnet(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.relu(x)
        return x

# Modified model to use only RGB branch
class LeafCountingModel(nn.Module):
    def __init__(self, rgb_branch):
        super(LeafCountingModel, self).__init__()
        self.rgb_branch = rgb_branch
        self.regressor = nn.Linear(1024, 1)

    def forward(self, rgb):
        rgb_features = self.rgb_branch(rgb)
        output = self.regressor(rgb_features)
        return output

# Dataset class (only RGB images now)
class LeafCountingDataset(Dataset):
    def __init__(self, csv_file, rgb_dir, transform=None):
        self.data = pd.read_csv(csv_file, dtype={0: str})
        self.rgb_dir = rgb_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        try:
            img_name = self.data.iloc[idx, 0].strip()  # Image name
            leaf_count = self.data.iloc[idx, 1]  # Leaf count

            rgb_path = os.path.join(self.rgb_dir, img_name + '.png')
            rgb_image = Image.open(rgb_path).convert('RGB')

            if self.transform:
                rgb_image = self.transform(rgb_image)  # Apply transformations for RGB

            return rgb_image, torch.tensor(leaf_count, dtype=torch.float)
        except Exception as e:
            print(f"Error loading image {img_name}: {str(e)}")
            return None

# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create dataset and dataloader
dataset = LeafCountingDataset(
    csv_file= r"C:\Users\praph\Downloads\Bottlegourd_week3.csv.txt",
    rgb_dir= r"C:\Users\praph\Downloads\week3_cropped_bottlegourd_RGB",
    transform=transform
)

def collate_fn(batch):
    batch = list(filter(lambda x: x is not None, batch))
    return torch.utils.data.dataloader.default_collate(batch)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=0, collate_fn=collate_fn)

# Create model (RGB branch only)
rgb_branch = RGBBranch(input_channels=3, name='rgb')
model = LeafCountingModel(rgb_branch)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    for rgb, leaf_count in dataloader:
        rgb, leaf_count = rgb.to(device), leaf_count.to(device)

        optimizer.zero_grad()
        outputs = model(rgb)
        loss = criterion(outputs.squeeze(), leaf_count)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print('Training finished!')


Epoch [1/20], Loss: 54.3683
Epoch [2/20], Loss: 23.1539
Epoch [3/20], Loss: 20.1985
Epoch [4/20], Loss: 10.7420
Epoch [5/20], Loss: 1.2113
Epoch [6/20], Loss: 0.4133
Epoch [7/20], Loss: 0.2341
Epoch [8/20], Loss: 1.3959
Epoch [9/20], Loss: 9.6384
Epoch [10/20], Loss: 5.6913
Epoch [11/20], Loss: 1.7998
Epoch [12/20], Loss: 1.6266
Epoch [13/20], Loss: 0.3446
Epoch [14/20], Loss: 0.4447
Epoch [15/20], Loss: 1.0181
Epoch [16/20], Loss: 1.5610
Epoch [17/20], Loss: 2.0285
Epoch [18/20], Loss: 0.8952
Epoch [19/20], Loss: 1.0157
Epoch [20/20], Loss: 2.7328
Training finished!
