In [62]:
import numpy as np
import torch
import torchvision
import math
import io
from torch.utils.data import Dataset, DataLoader

In [63]:
#Data loading in PyTorch can be separated in 2 parts:
#Data must be wrapped on a Dataset parent class where the methods __getitem__ and __len__ must be overrided. 


#the __len__ function which returns the size of the dataset, and
#the __getitem__ function which returns a sample from the dataset given an index.

#__len__ so that len(dataset) returns the size of the dataset.
#__getitem__ to support the indexing such that dataset[i] can be used to get ith sample


class WineDataset(Dataset):
  def __init__(self):
    from google.colab import files
    uploaded=files.upload()
    xy = np.loadtxt(io.BytesIO(uploaded['wine.csv']),delimiter=",",skiprows=1,dtype=np.float32)
    self.x=torch.from_numpy(xy[:,1:])
    self.y=torch.from_numpy(xy[:,[0]])
    self.n_samples=xy.shape[0]


  def __getitem__(self,index):
    return(self.x[index],self.y[index])


  def __len__(self):  
    return self.n_samples

dataset1=WineDataset()     # dataset1 object is used get index for every 178 observations of loaded file , done through __getitem__
                           # dataset1 is used to get length of file loaded , done through __len__ 

print(dataset1.__dict__.keys())                           #dict_keys(['x', 'y', 'n_samples'])

Saving wine.csv to wine (10).csv
dict_keys(['x', 'y', 'n_samples'])


In [64]:
#dataset
first_data=dataset1[0] #__getitem__ is called and [0] index is pointed
print(type(first_data))
print(len(first_data))
print(first_data)
print(len(dataset1))

<class 'tuple'>
2
(tensor([1.4230e+01, 1.7100e+00, 2.4300e+00, 1.5600e+01, 1.2700e+02, 2.8000e+00,
        3.0600e+00, 2.8000e-01, 2.2900e+00, 5.6400e+00, 1.0400e+00, 3.9200e+00,
        1.0650e+03]), tensor([1.]))
178


In [65]:
#dataloader
dataloader=DataLoader(dataset=dataset1,batch_size=4,shuffle=True,num_workers=2)
print(dataloader.__dict__.keys())

#DataLoader class is used to create batch of dataset stored in dataset1 above. batchsize metioned is 4

dict_keys(['dataset', 'num_workers', 'prefetch_factor', 'pin_memory', 'timeout', 'worker_init_fn', '_DataLoader__multiprocessing_context', '_dataset_kind', 'batch_size', 'drop_last', 'sampler', 'batch_sampler', 'generator', 'collate_fn', 'persistent_workers', '_DataLoader__initialized', '_IterableDataset_len_called', '_iterator'])


In [66]:
#to see first batch
dataiter=iter(dataloader)

data=dataiter.next()
features,label=data
print("\n\nfeatures : {}".format(features))
print("\n\nlabel : {}".format(label))
print("\n\n len(features : {}".format(len(features)))



features : tensor([[1.3450e+01, 3.7000e+00, 2.6000e+00, 2.3000e+01, 1.1100e+02, 1.7000e+00,
         9.2000e-01, 4.3000e-01, 1.4600e+00, 1.0680e+01, 8.5000e-01, 1.5600e+00,
         6.9500e+02],
        [1.3480e+01, 1.8100e+00, 2.4100e+00, 2.0500e+01, 1.0000e+02, 2.7000e+00,
         2.9800e+00, 2.6000e-01, 1.8600e+00, 5.1000e+00, 1.0400e+00, 3.4700e+00,
         9.2000e+02],
        [1.2530e+01, 5.5100e+00, 2.6400e+00, 2.5000e+01, 9.6000e+01, 1.7900e+00,
         6.0000e-01, 6.3000e-01, 1.1000e+00, 5.0000e+00, 8.2000e-01, 1.6900e+00,
         5.1500e+02],
        [1.1620e+01, 1.9900e+00, 2.2800e+00, 1.8000e+01, 9.8000e+01, 3.0200e+00,
         2.2600e+00, 1.7000e-01, 1.3500e+00, 3.2500e+00, 1.1600e+00, 2.9600e+00,
         3.4500e+02]])


label : tensor([[3.],
        [1.],
        [3.],
        [2.]])


 len(features : 4


In [67]:
print("no of samples in dataset ",len(dataset1))

no_of_batch=math.ceil(len(dataset1)/4)
data_iter=iter(dataloader)

for batch_no in  range(1,no_of_batch+1):
  features,labels=data_iter.next()
  if batch_no % 4 == 0 :#printing every multiple of 4 batch
    print("\n\nbatch no : {}".format(batch_no))
    print("\n\nfeature set : \n {}".format(features))
    print("\n\n labels set : \n {}".format(labels))


no of samples in dataset  178


batch no : 4


feature set : 
 tensor([[1.2290e+01, 2.8300e+00, 2.2200e+00, 1.8000e+01, 8.8000e+01, 2.4500e+00,
         2.2500e+00, 2.5000e-01, 1.9900e+00, 2.1500e+00, 1.1500e+00, 3.3000e+00,
         2.9000e+02],
        [1.3580e+01, 2.5800e+00, 2.6900e+00, 2.4500e+01, 1.0500e+02, 1.5500e+00,
         8.4000e-01, 3.9000e-01, 1.5400e+00, 8.6600e+00, 7.4000e-01, 1.8000e+00,
         7.5000e+02],
        [1.4160e+01, 2.5100e+00, 2.4800e+00, 2.0000e+01, 9.1000e+01, 1.6800e+00,
         7.0000e-01, 4.4000e-01, 1.2400e+00, 9.7000e+00, 6.2000e-01, 1.7100e+00,
         6.6000e+02],
        [1.3840e+01, 4.1200e+00, 2.3800e+00, 1.9500e+01, 8.9000e+01, 1.8000e+00,
         8.3000e-01, 4.8000e-01, 1.5600e+00, 9.0100e+00, 5.7000e-01, 1.6400e+00,
         4.8000e+02]])


 labels set : 
 tensor([[2.],
        [3.],
        [3.],
        [3.]])


batch no : 8


feature set : 
 tensor([[1.3050e+01, 2.0500e+00, 3.2200e+00, 2.5000e+01, 1.2400e+02, 2.6300e+00,
         2.

In [68]:
#creating a batch size of 20
dataset2=WineDataset()
print("total number of samples are ",dataset2.n_samples) #178
dataloader=DataLoader(dataset=dataset2,batch_size=20,shuffle=True,num_workers=2)
#batch size of 4 is created

#to get 1st batch

dataiter=iter(dataloader)
data=dataiter.next()

features,target=data

print(len(features))#4
print(type(features))#<class 'torch.Tensor'>


Saving wine.csv to wine (11).csv
total number of samples are  178
20
<class 'torch.Tensor'>


In [69]:
#to go through entire dataset with batch size=20
dataloader1=DataLoader(dataset=dataset2,batch_size=20,shuffle=True,num_workers=2)
dataiter2=iter(dataloader1) #The iter() function creates an object which can be iterated one element at a time.


epochs=1
total_samples=len(dataset2)
batch_size=20
n_iterations= math.ceil(total_samples/batch_size)

print("no of iterations ",n_iterations,"\n")
print(dataset2.__dict__.keys())#dict_keys(['x', 'y', 'n_samples'])
print("\n")

for ind ,data in enumerate(range(n_iterations)):
        print("batch no ",ind)
        features,target=dataiter2.next()
        print("shape(features)",features.shape)#shape(features) torch.Size([20, 13])
       
        


no of iterations  9 

dict_keys(['x', 'y', 'n_samples'])


batch no  0
shape(features) torch.Size([20, 13])
batch no  1
shape(features) torch.Size([20, 13])
batch no  2
shape(features) torch.Size([20, 13])
batch no  3
shape(features) torch.Size([20, 13])
batch no  4
shape(features) torch.Size([20, 13])
batch no  5
shape(features) torch.Size([20, 13])
batch no  6
shape(features) torch.Size([20, 13])
batch no  7
shape(features) torch.Size([20, 13])
batch no  8
shape(features) torch.Size([18, 13])
