<a href="https://colab.research.google.com/github/Pavinithan1998/DataScience_Practice/blob/main/DSPractical_01_02_2023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import torch.utils.data as data
from PIL import Image

# to convert the class names to numbers
def getLblInt(s):
    if 'Homogeneous' in s or 'homogeneous' in s: lbl = 0
    elif 'Speckled' in s or 'speckled' in s: lbl = 1
    elif 'Nucleolar' in s or 'nucleolar' in s: lbl = 2
    elif 'Centromere' in s or 'centromere' in s: lbl = 3
    elif 'NuMem' in s or 'numem' in s: lbl = 4
    elif 'Golgi' in s or 'golgi' in s: lbl = 5
    else: 
        print(s)
        raise ValueError("Unknown Label")
    return lbl
        
# to load the data from the folder
def getData(fn, srcDir):    
    lblarr = []
    imgFnArr = []
    file = open(fn, 'r')
    for line in file:
        line = line.split(',')
        lbl = getLblInt(line[1])
        lblarr.append(lbl)
        imgFnArr.append(os.path.join(srcDir, line[0]+'.png'))
    file.close()
    return imgFnArr, lblarr


# to print the number of images in each class
def calStats(lbl_arr):
    unique_lbls = set(lbl_arr)
    print('Total images : ', len(lbl_arr))
    for lbl in unique_lbls:
        idx = [i for i, x in enumerate(lbl_arr) if x == lbl]
        print('Number of images in class ', lbl, ': ', len(idx))
    print('\n-----------------\n')


# Read an image, convert it to gray scale and return it
def pil_loader(path):
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


class DatasetLoader(data.Dataset):
    def __init__(self, train=True, transform=None):
        img_dir = '/content/drive/MyDrive/SmallDataset_CellImages'
        if train: annot_fn = 'train.txt'
        else: annot_fn = 'test.txt'
        annot_fn = os.path.join(img_dir, annot_fn)
        self.fnArr, self.lbls = getData(annot_fn, img_dir )
        self.classLbls = set(self.lbls)

        self.transform = transform
        self.train = train
        if self.train: print('\nTrain--->')
        else: print('\nTest--->')
        calStats(self.lbls)

    # when an index is given as the input
    # return the corresponding image and its target
    def __getitem__(self, index):
        img_fn = self.fnArr[index]
        target = self.lbls[index]
        imagedata = pil_loader(img_fn)
        if self.transform is not None:
            imagedata = self.transform(imagedata)
        return imagedata, target          

    def getLbls(self):
        return self.classLbls

    def __len__(self):
        return len(self.fnArr)
    


if __name__ == '__main__':
  DatasetLoader()
  DatasetLoader(False)
  # imgFnArr, lbls = getData("/content/drive/MyDrive/SmallDataset_CellImages/train.txt", "/content/drive/MyDrive/SmallDataset_CellImages")
  # calStats(lbls)
  

In [None]:
!python /content/sample_data/DatasetLoader.py


In [None]:
!python /content/sample_data/util.py

In [None]:
!python /content/sample_data/Net.py

In [None]:
import torchvision.transforms as transforms
import torch
# import DatasetLoader

def get_loaders(bs):
    normalize = transforms.Normalize(mean=[0.3111,0.3111,0.3111], std=[0.2845,0.2845,0.2845])
    imsize = 100
    imsize2 = 80

    transform_train = transforms.Compose([
        transforms.Resize(imsize),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation([0, 180]),
        transforms.RandomCrop(imsize2),
        transforms.ToTensor(),
        normalize
    ])

    transform_test = transforms.Compose([
        transforms.Resize(imsize),
        transforms.CenterCrop(imsize2),
        transforms.ToTensor(),
        normalize])

    # Dataset
    print('\nPreparing data----->')
    trainset = DatasetLoader(train=True, transform=transform_train)
    testset = DatasetLoader(train=False, transform=transform_test)

    trainloader = torch.utils.data.DataLoader(trainset, 
                                              batch_size=bs, 
                                              num_workers=1,
                                              shuffle=True)
    testloader = torch.utils.data.DataLoader(testset, 
                                             batch_size=bs, 
                                             num_workers=1,
                                             shuffle=False)
    classLbls = trainset.getLbls()
    return trainloader, testloader, len(trainset), len(testset), classLbls
    

if __name__ == '__main__':
  get_loaders(8)

        

In [None]:
import torchvision.transforms as transforms
import torch
from DatasetLoader import *

def get_loaders(bs):
    normalize = transforms.Normalize(mean=[0.3111,0.3111,0.3111], std=[0.2845,0.2845,0.2845])
    imsize = 100
    imsize2 = 80

    transform_train = transforms.Compose([
        transforms.Resize(imsize),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation([0, 180]),
        transforms.RandomCrop(imsize2),
        transforms.ToTensor(),
        normalize
    ])

    transform_test = transforms.Compose([
        transforms.Resize(imsize),
        transforms.CenterCrop(imsize2),
        transforms.ToTensor(),
        normalize])

    # Dataset
    print('\nPreparing data----->')
    trainset = DatasetLoader(train=True, transform=transform_train)
    testset = DatasetLoader(train=False, transform=transform_test)

    trainloader = torch.utils.data.DataLoader(trainset, 
                                              batch_size=bs, 
                                              num_workers=1,
                                              shuffle=True)
    testloader = torch.utils.data.DataLoader(testset, 
                                             batch_size=bs, 
                                             num_workers=1,
                                             shuffle=False)
    classLbls = trainset.getLbls()
    return trainloader, testloader
    

if __name__ == '__main__':
  get_loaders(8)

        