In [24]:
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
from torch.utils.data import random_split
import pandas as pd
import torch

import json 

In [10]:
class TomatoData(Dataset): 
    def __init__(self, data_path): 
        self.data = pd.read_csv(data_path)
        self.features = torch.tensor(
            self.data.iloc[:, :-1].values, 
            dtype=torch.float32
        )
        self.labels = torch.tensor(
            self.data.iloc[:, -1].values, 
            dtype=torch.long
        )

    def __len__(self): 
        return len(self.labels)
    
    def __getitem__(self, index):
        return self.features[index], self.labels[index]

In [17]:
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(), 
        transforms.RandomHorizontalFlip(), 
        transforms.RandomRotation(30),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]
)

tomato_dataset = datasets.ImageFolder(
    root="../data", 
    transform=transform
)

In [20]:
train_size = int(0.8 * len(tomato_dataset))
val_size = len(tomato_dataset) - train_size

train_set, val_set = random_split(tomato_dataset, [train_size, val_size])

In [23]:
train_dataloader, val_dataloader = (
    DataLoader(train_set, batch_size=32, shuffle=True), 
    DataLoader(val_set, batch_size=32, shuffle=False)
)

In [25]:
train_indices, val_indices = train_set.indices, val_set.indices

In [27]:
with open("../preprocessed/train_indices.json", "w") as f: 
    json.dump(train_indices, f)

with open("../preprocessed/val_indices.json", "w") as f: 
    json.dump(val_indices, f)