<a href="https://colab.research.google.com/github/MohebZandi/Comupter_Vision/blob/main/Data_Numpy_vs_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
  # import libraries
import numpy as np
import torch
from torch.utils.data import DataLoader,TensorDataset

In [2]:
# create some data in numpy

nObservations = 100
nFeatures = 20

data = np.random.randn(nObservations,nFeatures)

In [3]:
# Convert to pytorch tensor
dataT = torch.tensor( data ) 

# print out some information
print('Numpy data:')
print(type(data))
print(data.shape) # numpy -> .shape
print(data.dtype)
print(' ')

print('Tensor data:')
print(type(dataT))
print(dataT.size()) # torch -> .size()
print(dataT.dtype)
print(' ')

Numpy data:
<class 'numpy.ndarray'>
(100, 20)
float64
 
Tensor data:
<class 'torch.Tensor'>
torch.Size([100, 20])
torch.float64
 


In [4]:
# Sometimes you need to convert data types

dataT2 = torch.tensor( data ).float()    # float data type in tensor
print(dataT2.dtype)

# "long" is for ints
dataT3 = torch.tensor( data ).long()    # int64 data type in tensor
print(dataT3.dtype)


torch.float32
torch.int64


In [7]:
# Convert tensor into PyTorch Datasets

# dataset = TensorDataset(data)     # not a tensor!
dataset = TensorDataset(dataT)

# dataset is a two-element tuple comprising data,labels
dataset.tensors[0]

tensor([[-0.1410,  0.6943,  0.5690,  ...,  0.7256, -0.1225, -0.2654],
        [ 1.7044,  0.2279,  0.2249,  ..., -2.1575,  0.6620,  0.2855],
        [-0.5568,  0.6708,  1.2340,  ...,  1.7823,  0.3084,  1.0239],
        ...,
        [ 1.5667,  0.5222, -0.1578,  ..., -0.1839, -0.6661,  0.5595],
        [-0.9942,  1.9583,  0.1899,  ...,  0.0719,  1.1933,  1.5907],
        [-0.9264, -0.2730,  0.7271,  ..., -0.2822,  2.3999,  0.2298]],
       dtype=torch.float64)

In [11]:
# Let's try again with labels
labels = torch.ceil(torch.linspace(.01,4,nObservations))

# transform to an actual matrix (column vector)



tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 3., 3., 3., 3.,
        3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
        3., 3., 3., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4.])


In [None]:
# see the labels for every row
labels = labels.reshape(( len(labels),1 ))
print( labels )

In [13]:

# now make another dataset
dataset = TensorDataset(dataT,labels)
print( dataset.tensors[0].size() )
print( dataset.tensors[1].size() )

# for comparison
print( np.shape(np.random.randint(5,size=nObservations)) )

torch.Size([100, 20])
torch.Size([100, 1])
(100,)


In [14]:
dataset[0]    # it contains the data and label in one row

(tensor([-0.1410,  0.6943,  0.5690,  0.7939,  0.5175,  1.4316,  0.9518, -1.1957,
         -0.2280,  0.4413, -0.3202, -1.4459, -0.1835,  0.0931,  0.7619, -0.8573,
          1.3076,  0.7256, -0.1225, -0.2654], dtype=torch.float64),
 tensor([1.]))