In [1]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
import cv2
import os

In [2]:
data = []
labels = []
for filename in os.listdir("resized_data/healthycows/"):
    img = cv2.imread("resized_data/healthycows/" + filename)
    data.append(img)
    labels.append(0)
for filename in os.listdir("resized_data/lumpycows/"):
    img = cv2.imread("resized_data/lumpycows/" + filename)
    data.append(img)
    labels.append(1)

In [3]:
X_train, X_test_val, y_train, y_test_val = train_test_split(data, labels, test_size=.2, shuffle=True)
X_test, X_val, y_test, y_val = train_test_split(X_test_val, y_test_val, test_size=.5, shuffle=True)

In [4]:
class LumpyDataset(Dataset):
    def __init__(self, images, labels) -> None:
        self.X = images
        self.y = labels
        # function for images transformations
        # TODO: try AutoAugment(AutoAugmentPolicy.IMAGENET)
        self.random_transform = transforms.Compose([
            transforms.ToPILImage(),
            # transforms.RandomHorizontalFlip(),
            # transforms.ColorJitter(
            #     brightness=0.3, contrast=0.3),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        num_augment = 3
        augmented_batch = []
        # creating augmented data
        for i in range(num_augment):
            new_item = self.random_transform(self.X[index])
            augmented_batch.append(new_item)
        # labels with one-hot encoding
        label = torch.Tensor([self.y[index]])

        new_labels = [label, label, label]

        return torch.stack(augmented_batch), torch.stack(new_labels)

In [5]:
# testing our dataset class
koten_set = LumpyDataset(X_val, y_val)
koten_loader = DataLoader(koten_set, batch_size=16, shuffle=True, drop_last=True)
for X, y in koten_loader:
    print("koten", X.shape, y.shape)
# koten_set[5]

koten torch.Size([16, 3, 3, 225, 225]) torch.Size([16, 3, 1])
koten torch.Size([16, 3, 3, 225, 225]) torch.Size([16, 3, 1])
koten torch.Size([16, 3, 3, 225, 225]) torch.Size([16, 3, 1])
koten torch.Size([16, 3, 3, 225, 225]) torch.Size([16, 3, 1])
koten torch.Size([16, 3, 3, 225, 225]) torch.Size([16, 3, 1])


In [6]:
# creating datasets from numpy arrays
train_set = LumpyDataset(X_train, y_train)
valid_set = LumpyDataset(X_val, y_val)
test_set = LumpyDataset(X_test, y_test)

In [7]:
# creating data loaders from datasets
train_loader = DataLoader(train_set, batch_size=8, shuffle=True, pin_memory=True, drop_last=True)
valid_loader = DataLoader(valid_set, batch_size=8, shuffle=True, pin_memory=True, drop_last=True)
test_loader = DataLoader(test_set, batch_size=8, shuffle=True, pin_memory=True, drop_last=True)

In [8]:
# testing data loader
X,y = next(iter(test_loader))
X.shape, y.shape

(torch.Size([8, 3, 3, 225, 225]), torch.Size([8, 3, 1]))

In [9]:
# saving all data loaders
with open("variables/train_loader.pickle", "wb") as f:
    pickle.dump(train_loader, f)
with open("variables/valid_loader.pickle", "wb") as f:
    pickle.dump(valid_loader, f)
with open("variables/test_loader.pickle", "wb") as f:
    pickle.dump(test_loader, f)