In [42]:
# create a class for dataset loading
import os
import numpy as np
from torch.utils.data import Dataset
import random
import cv2
import pandas as pd

In [43]:
# images_dir = "../dataset/crop_224/"
# labels_dir = "../dataset/AppleV_intrinsic.csv"

# class AppleDataset(Dataset):
#     def __init__(self, images_dir, labels_dir):
#         self.labels = pd.read_csv(labels_dir)
#         self.images = os.listdir(images_dir)

#         # load the images and labels in a list
#         self.img_data = []
#         self.img_labels = []

#         for image in self.images:
#             img_path = os.path.join(images_dir, image)
#             img = cv2.imread(img_path)
#             img = np.transpose(img, [2, 0, 1])
#             img = np.float32(img) / 255.0
#             if img.shape != (3, 224, 224):
#                 print(image)
#                 continue
#             self.img_data.append(img)

#             image = image.split("_")[0]+'.jpg'
#             t = self.labels[self.labels['filenames'] == image][['TSS_brix', 'Firmness', 'Titerabile acid percentage']].values
#             t = t.astype(np.float32).flatten()

#             t = n
#             self.img_labels.append(t)

#     def __len__(self):
#         return len(self.images)

#     def arguement(self, img, rotTimes, vFlip, hFlip):
#         # Random rotation
#         for j in range(rotTimes):
#             img = np.rot90(img.copy(), axes=(1, 2))
#         # Random vertical Flip
#         for j in range(vFlip):
#             img = img[:, :, ::-1].copy()
#         # Random horizontal Flip
#         for j in range(hFlip):
#             img = img[:, ::-1, :].copy()
#         return img

#     def __getitem__(self, idx):
#         image = self.img_data[idx]
#         label = self.img_labels[idx]

#         rotTimes = random.randint(0, 3)
#         vFlip = random.randint(0, 1)
#         hFlip = random.randint(0, 1)
#         image = self.arguement(image, rotTimes, vFlip, hFlip)
        
#         image = image.copy()


#         return image, label

In [44]:
images_dir = "../dataset/crop_224/"
labels_dir = "../dataset/AppleV_intrinsic.csv"

In [45]:
class AppleDataset(Dataset): 
    def __init__(self, images_dir, labels_dir):
        self.labels = pd.read_csv(labels_dir)
        self.images = os.listdir(images_dir)

        # load the images and labels in a list
        self.img_data = []
        self.img_labels = []

        for image in self.images:
            img_path = os.path.join(images_dir, image)
            img = cv2.imread(img_path)
            img = np.transpose(img, [2, 0, 1])
            img = np.float32(img) / 255.0   # convert image to float32 [0,1]
            
            if img.shape != (3, 224, 224):
                print("Skipping:", image)
                continue
            self.img_data.append(img)

            # match labels
            image = image.split("_")[0] + '.jpg'
            t = self.labels[self.labels['filenames'] == image][
                ['TSS_brix', 'Firmness', 'Titerabile acid percentage']
            ].values

            # Convert label to float32 and flatten
            t = t.astype(np.float32).flatten()
            self.img_labels.append(t)

    def __len__(self):
        return len(self.img_data)   # safer than self.images

    def arguement(self, img, rotTimes, vFlip, hFlip):
        # Random rotation
        for j in range(rotTimes):
            img = np.rot90(img.copy(), axes=(1, 2))
        # Random vertical Flip
        for j in range(vFlip):
            img = img[:, :, ::-1].copy()
        # Random horizontal Flip
        for j in range(hFlip):
            img = img[:, ::-1, :].copy()
        return img

    def __getitem__(self, idx):
        image = self.img_data[idx]
        label = self.img_labels[idx]

        rotTimes = random.randint(0, 3)
        vFlip = random.randint(0, 1)
        hFlip = random.randint(0, 1)
        image = self.arguement(image, rotTimes, vFlip, hFlip)
        
        return image.copy(), label


In [46]:
train_dataset = AppleDataset(images_dir, labels_dir)

In [16]:
import torch
import torch.nn as nn
from torchvision import models

class VGG19(nn.Module):
    def __init__(self, input_size=(3, 224, 224), output_size=3, finetune=False):
        super(VGG19, self).__init__()
        
        # Load pre-trained VGG19 model
        model = models.vgg19(weights='IMAGENET1K_V1')

        # Freeze convolutional layers
        for param in model.features.parameters():
            param.requires_grad = not finetune

        # Modify the classifier: to reduce the number of params 
        model.classifier[0] = nn.Linear(25088, 2048)  # Output layer with the desired output size
        model.classifier[3] = nn.Linear(2048, 2048)
        model.classifier[6] = nn.Linear(2048, output_size)
        self.model = model

    def forward(self, x):
        return self.model(x)

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

class EfficientNet(nn.Module):
    def __init__(self, input_size=(3, 224, 224), output_size=3, finetune=False, variant="b0"):
        super(EfficientNet, self).__init__()

        # Load pretrained EfficientNet
        if variant == "b0":
            model = models.efficientnet_b0(weights="IMAGENET1K_V1")
        elif variant == "b1":
            model = models.efficientnet_b1(weights="IMAGENET1K_V1")
        elif variant == "b2":
            model = models.efficientnet_b2(weights="IMAGENET1K_V1")
        else:
            raise ValueError(f"EfficientNet variant {variant} not supported here.")

        # Freeze backbone if not finetuning
        for param in model.features.parameters():
            param.requires_grad = finetune

        # Replace classifier (EfficientNet has classifier[1] as final layer)
        in_features = model.classifier[1].in_features
        model.classifier = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, output_size)
        )

        self.model = model

    def forward(self, x):
        return self.model(x)


In [36]:
import torch
import torch.nn as nn
import torchvision.models as models

class ViT(nn.Module):
    def __init__(self, input_size=(3, 224, 224), output_size=3, finetune=False, variant="b_16"):
        super(ViT, self).__init__()

        # Load pretrained Vision Transformer
        if variant == "b_16":
            model = models.vit_b_16(weights="IMAGENET1K_V1")
        elif variant == "b_32":
            model = models.vit_b_32(weights="IMAGENET1K_V1")
        else:
            raise ValueError(f"ViT variant {variant} not supported here.")

        # Freeze backbone if not finetuning
        for param in model.parameters():
            param.requires_grad = finetune

        # Replace the head (classifier)
        in_features = model.heads.head.in_features
        model.heads.head = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, output_size)
        )

        self.model = model

    def forward(self, x):
        return self.model(x)


In [48]:
import torch
import torch.nn as nn
from torchvision.models import swin_t, Swin_T_Weights

class SwinT(nn.Module):
    def __init__(self, input_size=(3, 224, 224), output_size=3, finetune=False):
        super(SwinT, self).__init__()
        
        # Load pre-trained Swin Transformer Tiny model
        model = swin_t(weights=Swin_T_Weights.IMAGENET1K_V1)
        
        # Freeze layers if finetune=False
        for param in model.parameters():
            param.requires_grad = finetune  # True if finetuning

        # Replace classifier head for regression
        in_features = model.head.in_features
        model.head = nn.Linear(in_features, output_size)

        self.model = model

    def forward(self, x):
        return self.model(x)


In [49]:
import torch
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Function to calculate metrics
def calculate_metrics(y_true, y_pred):
    y_true = y_true.detach().cpu().numpy()
    y_pred = y_pred.detach().cpu().numpy()
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, mae, r2

def write_to_csv(model_name, num_epochs, train_losses, val_losses, train_metrics, val_metrics, total_params, trainable_params, non_trainable_params):
    # Save results to CSV
    results = {
        "Epoch": list(range(1, num_epochs + 1)),
        "Train Loss": train_losses,
        "Train MSE": train_metrics["MSE"],
        "Train MAE": train_metrics["MAE"],
        "Train R2": train_metrics["R2"],
    }
    
    # Add parameter counts as additional rows (repeated across all rows for clarity)
    results["Total Params"] = [total_params] * num_epochs
    results["Trainable Params"] = [trainable_params] * num_epochs
    results["Non-trainable Params"] = [non_trainable_params] * num_epochs

    results["Val Loss"] = val_losses * num_epochs
    results["Val MSE"] = val_metrics["MSE"] * num_epochs
    results["Val MAE"] = val_metrics["MAE"] * num_epochs
    results["Val R2"] = val_metrics["R2"] * num_epochs

    df = pd.DataFrame(results)
    df.to_csv(f"results/{model_name}.csv", index=False)
    print("Results saved to 'training_results_with_params.csv'")

def calculate_model_parameters(model):
    # Count model parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    non_trainable_params = total_params - trainable_params

    print(f"Total parameters: {total_params}")
    print(f"Trainable parameters: {trainable_params}")
    print(f"Non-trainable parameters: {non_trainable_params}")
    return total_params, trainable_params, non_trainable_params


def train_model(model_name, model, train_loader, val_loader, criterion, optimizer, num_epochs=10, device='cuda'):
    model = model.to(device)
    total_params, trainable_params, non_trainable_params = calculate_model_parameters(model)

    # Early stopping parameters
    patience = 5
    best_train_loss = float('inf')
    epochs_no_improve = 0

    # Lists to store metrics
    train_losses = []
    val_losses = []
    train_metrics = {"MSE": [], "MAE": [], "R2": []}
    val_metrics = {"MSE": [], "MAE": [], "R2": []}

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        y_true_train, y_pred_train = [], []

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)

            # Calculate loss
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Accumulate loss and predictions
            running_loss += loss.item()
            y_true_train.append(labels)
            y_pred_train.append(outputs)
            break

        # Normalize training loss
        train_loss = running_loss / len(train_loader)
        train_losses.append(train_loss)

        # Calculate training metrics
        y_true_train = torch.cat(y_true_train, dim=0)
        y_pred_train = torch.cat(y_pred_train, dim=0)
        mse, mae, r2 = calculate_metrics(y_true_train, y_pred_train)
        train_metrics["MSE"].append(mse)
        train_metrics["MAE"].append(mae)
        train_metrics["R2"].append(r2)

        print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_loss:.4f}, MSE: {mse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")

        # Early stopping logic
        if train_loss < best_train_loss:
            best_train_loss = train_loss
            epochs_no_improve = 0
            # Optionally save the best model
            torch.save(model.state_dict(), "best_model.pth")
        else:
            epochs_no_improve += 1

        if epochs_no_improve == patience:
            print(f"Early stopping triggered. No improvement for {patience} epochs.")

    # Validation phase
    model.eval()
    val_loss = 0.0
    y_true_val, y_pred_val = [], []

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)

            # Calculate loss
            loss = criterion(outputs, labels)

            # Accumulate validation loss and predictions
            val_loss += loss.item()
            y_true_val.append(labels)
            y_pred_val.append(outputs)
            break

    # Normalize validation loss
    val_loss /= len(val_loader)
    val_losses.append(val_loss)

    # Calculate validation metrics
    y_true_val = torch.cat(y_true_val, dim=0)
    y_pred_val = torch.cat(y_pred_val, dim=0)
    mse, mae, r2 = calculate_metrics(y_true_val, y_pred_val)
    val_metrics["MSE"].append(mse)
    val_metrics["MAE"].append(mae)
    val_metrics["R2"].append(r2)

    print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.4f}, MSE: {mse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")

    ## Save the model's state dictionary
    # torch.save(model.state_dict(), f"saved_models/{model_name}.pth")
    # print(f'Model saved at saved_models/{model_name}.pth')

    # write_to_csv(model_name, num_epochs, train_losses, val_losses, train_metrics, val_metrics, total_params, trainable_params, non_trainable_params)

In [50]:
import torch
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.utils.data import random_split
import pandas as pd

In [51]:
# train_dataset = AppleDataset(images_dir, labels_dir)
val_size = int(0.2 * len(train_dataset))
train_size = len(train_dataset) - val_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=2)

In [31]:
# Create ViT model object
model = ViT(input_size=(3, 224, 224), output_size=3, finetune=False)

# Loss function and optimizer
criterion = nn.MSELoss()  # since you're doing regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Device
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Train
num_epochs = 100
train_model(model, model, train_loader, val_loader, criterion, optimizer, num_epochs=num_epochs, device=device)


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth


  0%|          | 0.00/330M [00:00<?, ?B/s]

Total parameters: 86193923
Trainable parameters: 395267
Non-trainable parameters: 85798656
Epoch [1/100], Training Loss: 0.0002, MSE: 0.2434, MAE: 0.4274, R2: -7.3954
Epoch [2/100], Training Loss: 0.0001, MSE: 0.1753, MAE: 0.3248, R2: -3.4889
Epoch [3/100], Training Loss: 0.0002, MSE: 0.2331, MAE: 0.4100, R2: -7.9920
Epoch [4/100], Training Loss: 0.0001, MSE: 0.0742, MAE: 0.2002, R2: -2.7736
Epoch [5/100], Training Loss: 0.0001, MSE: 0.0877, MAE: 0.2389, R2: -2.0282
Epoch [6/100], Training Loss: 0.0001, MSE: 0.0978, MAE: 0.2468, R2: -2.8920
Epoch [7/100], Training Loss: 0.0001, MSE: 0.1366, MAE: 0.2986, R2: -6.7691
Epoch [8/100], Training Loss: 0.0001, MSE: 0.1258, MAE: 0.2929, R2: -2.6513
Epoch [9/100], Training Loss: 0.0001, MSE: 0.0665, MAE: 0.2199, R2: -1.5265
Epoch [10/100], Training Loss: 0.0000, MSE: 0.0618, MAE: 0.2095, R2: -0.9019
Epoch [11/100], Training Loss: 0.0000, MSE: 0.0559, MAE: 0.1909, R2: -0.6507
Epoch [12/100], Training Loss: 0.0000, MSE: 0.0525, MAE: 0.1966, R2: -0

In [21]:
model = VGG19(input_size=(3, 224, 224), output_size=3, finetune=False)
# Loss function and optimizer
criterion = nn.MSELoss()  # For regression (use BCEWithLogitsLoss for multi-label classification)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Assuming you have a Da taLoader ready as `data_loader`
# device = 'cuda:1' if torch.cuda.is_available() else 'cpu'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_epochs = 100
train_model(model, model, train_loader, val_loader, criterion, optimizer, num_epochs=num_epochs, device=device)

Total parameters: 75609155
Trainable parameters: 75609155
Non-trainable parameters: 0
Epoch [1/100], Training Loss: 0.0003, MSE: 0.3351, MAE: 0.5283, R2: -8.9776
Epoch [2/100], Training Loss: 0.4189, MSE: 537.3878, MAE: 22.1737, R2: -37414.7030
Epoch [3/100], Training Loss: 0.0001, MSE: 0.0730, MAE: 0.2142, R2: -1.2208
Epoch [4/100], Training Loss: 0.0005, MSE: 0.6754, MAE: 0.6250, R2: -17.3083
Epoch [5/100], Training Loss: 0.0003, MSE: 0.4095, MAE: 0.5763, R2: -15.2062
Epoch [6/100], Training Loss: 0.0001, MSE: 0.1345, MAE: 0.3063, R2: -2.9594
Epoch [7/100], Training Loss: 0.0379, MSE: 48.6379, MAE: 6.2733, R2: -1937.3174
Epoch [8/100], Training Loss: 0.0099, MSE: 12.7643, MAE: 3.1725, R2: -376.1092
Early stopping triggered. No improvement for 5 epochs.
Epoch [9/100], Training Loss: 0.0002, MSE: 0.2720, MAE: 0.4903, R2: -12.8923
Epoch [10/100], Training Loss: 0.0001, MSE: 0.0653, MAE: 0.2010, R2: -0.8495
Epoch [11/100], Training Loss: 0.0001, MSE: 0.1126, MAE: 0.2744, R2: -3.6720
Epoc

In [53]:
# Create model
model = SwinT(input_size=(3, 224, 224), output_size=3, finetune=False)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Device
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
num_epochs = 100
train_model(model, model, train_loader, val_loader, criterion, optimizer, num_epochs=num_epochs, device=device)


Total parameters: 27521661
Trainable parameters: 2307
Non-trainable parameters: 27519354
Epoch [1/100], Training Loss: 0.0003, MSE: 0.3593, MAE: 0.4992, R2: -6.5769
Epoch [2/100], Training Loss: 0.0002, MSE: 0.2428, MAE: 0.3940, R2: -6.8238
Epoch [3/100], Training Loss: 0.0002, MSE: 0.1961, MAE: 0.3778, R2: -5.7998
Epoch [4/100], Training Loss: 0.0001, MSE: 0.1440, MAE: 0.2930, R2: -3.1157
Epoch [5/100], Training Loss: 0.0001, MSE: 0.0657, MAE: 0.2092, R2: -2.6806
Epoch [6/100], Training Loss: 0.0001, MSE: 0.1007, MAE: 0.2352, R2: -3.1264
Epoch [7/100], Training Loss: 0.0000, MSE: 0.0556, MAE: 0.1957, R2: -1.7371
Epoch [8/100], Training Loss: 0.0001, MSE: 0.0855, MAE: 0.2210, R2: -4.3238
Epoch [9/100], Training Loss: 0.0000, MSE: 0.0617, MAE: 0.2126, R2: -1.5577
Epoch [10/100], Training Loss: 0.0001, MSE: 0.1508, MAE: 0.2802, R2: -2.5503
Epoch [11/100], Training Loss: 0.0001, MSE: 0.1208, MAE: 0.2630, R2: -5.1414
Epoch [12/100], Training Loss: 0.0001, MSE: 0.1344, MAE: 0.2942, R2: -6.8