In [1]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
import math



## 1 Dataset

In [2]:
class WineDataset(Dataset):

    def __init__(self) -> None:
            ### np load data
        xy = np.loadtxt(fname=r"C:\Users\jites\Downloads\wine.csv",    ## file path
                        delimiter=',',                                 ## it is a csv file some the datas are seperated by ( , ) comma
                        dtype=np.float32,                              ## what is the data type
                        skiprows=1)                                    ## hwhich rows you want to skip 
        self.x = torch.from_numpy(xy[:, 1:])        ## x contain all rows and except  1st column all columns
        self.y = torch.from_numpy(xy[:, [0]])       ## y contain all rows and only 0th index column( first column )
        self.n_samples = xy.shape[0]                ## how many records are there


    def __getitem__(self, index):
        return self.x[index], self.y[index]
    

    def __len__(self):
        return self.n_samples


In [3]:
wn_dataset = WineDataset()
first_data = wn_dataset[0]         ## this thing will call ( __getitem__) method
print(first_data)
features, labels = first_data   ## ( __getitem__ ) returns 2 data ( x and y )
print(features, labels)

(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]), tensor([1.]))
tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


## 2 DataLoader

In [4]:
dataloader = DataLoader(dataset= wn_dataset,        ## dataset class
                        batch_size=4,               ## batch_size for how many data you want to read at one time
                        shuffle=True,               ## shuffle all the columns
                        num_workers=0)              ## how many worker you want to add .....  this should always be zero 

dataiter = iter(dataloader)             ## iterate through the all data
data = next(dataiter)                   ## get one btch of data
# data = dataiter.next()
features, labels = data                 ## you will get x and y data together from the CSV file
print(features, labels)

tensor([[1.2080e+01, 2.0800e+00, 1.7000e+00, 1.7500e+01, 9.7000e+01, 2.2300e+00,
         2.1700e+00, 2.6000e-01, 1.4000e+00, 3.3000e+00, 1.2700e+00, 2.9600e+00,
         7.1000e+02],
        [1.2360e+01, 3.8300e+00, 2.3800e+00, 2.1000e+01, 8.8000e+01, 2.3000e+00,
         9.2000e-01, 5.0000e-01, 1.0400e+00, 7.6500e+00, 5.6000e-01, 1.5800e+00,
         5.2000e+02],
        [1.2290e+01, 2.8300e+00, 2.2200e+00, 1.8000e+01, 8.8000e+01, 2.4500e+00,
         2.2500e+00, 2.5000e-01, 1.9900e+00, 2.1500e+00, 1.1500e+00, 3.3000e+00,
         2.9000e+02],
        [1.2250e+01, 1.7300e+00, 2.1200e+00, 1.9000e+01, 8.0000e+01, 1.6500e+00,
         2.0300e+00, 3.7000e-01, 1.6300e+00, 3.4000e+00, 1.0000e+00, 3.1700e+00,
         5.1000e+02]]) tensor([[2.],
        [3.],
        [2.],
        [2.]])


## 3 transform

In [5]:
class WineDataset2(Dataset):

    def __init__(self, transform = None):
        xy = np.loadtxt(fname= r"C:\Users\jites\Downloads\wine.csv", dtype=np.float32, delimiter=',', skiprows=1)
        self.x = xy[:, 1:]
        self.y = xy[:, [0]]
        self.transform = transform
        self.n_samples = xy.shape[0]

    def __getitem__(self, index):
        sample = self.x[index] , self.y[index]

        if self.transform:
            sample = self.transform(sample)         ## transform is a class which will trnsform your data into any other format or value

        return sample

    def __len__(self):
        return self.n_samples



class ToTensor:

    def __call__(self, sample):                     ## once you pass any data to a class ( __call__ ) method will automatically call
        input, traget = sample                  ## get the sample data in ( __call__ ) method
        return torch.from_numpy(input), torch.from_numpy(traget)        ## convert the data from np array to torch tensor and return it

In [6]:
wn_dataset2 = WineDataset2(transform=ToTensor())        ## transform is a class which will convert the np array to torch tensor 
first_data = wn_dataset2[0]
features, labels = first_data
print(features, labels)

tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]) tensor([1.])


## 4 MulTransform

In [7]:
class MulTransform:

    def __init__(self, factor):
        self.factor = factor

    def __call__(self, sample):             ## sample data will come to this method
        input, target = sample
        input = input * self.factor         ## multiply the factor value with the input tensor
        return input, target                ## return the actual raget data and new input tensor
    

compose = torchvision.transforms.Compose([ToTensor(), MulTransform(2)])         ## compose contain a list of transform class
                                                                            ## one class will execute some operationa and the data will 
                                                                            ## automatically enter the second class for the second operation
                                                                            ## and process will continue till the last class
wn_dataset3 = WineDataset2(transform=compose)
first_data = wn_dataset3[0]
features, labels = first_data
print(features, labels)

tensor([2.8460e+01, 3.4200e+00, 4.8600e+00, 3.1200e+01, 2.5400e+02, 5.6000e+00,
        6.1200e+00, 5.6000e-01, 4.5800e+00, 1.1280e+01, 2.0800e+00, 7.8400e+00,
        2.1300e+03]) tensor([1.])
