<a href="https://colab.research.google.com/github/AymanNasser/Random-DNN-Notebooks/blob/master/Real_world_data_representation_with_tensors_using_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import torch

## Working with images

In [2]:
import imageio
import os
import torchvision
from skimage.transform import resize
from PIL import Image

In [None]:
img = imageio.imread('../tmp/Horse.png')
img.shape

(576, 1024, 3)

In [None]:
img_t = torch.from_numpy(img)
img_t = img_t.permute(2,0,1) # permuting the tensor ==> channels * height * width
img_t.shape

torch.Size([3, 576, 1024])

In [None]:
data_dir = '../tmp'
filenames = [name for name in os.listdir(data_dir) if (os.path.splitext(name)[-1] == '.png') or 
                                                      (os.path.splitext(name)[-1] == '.jpg') or 
                                                      (os.path.splitext(name)[-1] == '.jpeg')]

batch_size = len(filenames)
batch = torch.zeros(batch_size, 3, 256, 256, dtype=torch.float32)

for ite, filename in enumerate(filenames):
  img = imageio.imread(os.path.join(data_dir,filename))
  img = resize(img, (256,256)) # Info. lost due to resizing
  img_t = torch.from_numpy(img).permute(2,0,1)
  batch[ite] = img_t
batch /= 255. # Normalizing 
batch


tensor([[[[0.0018, 0.0019, 0.0014,  ..., 0.0020, 0.0019, 0.0019],
          [0.0016, 0.0017, 0.0013,  ..., 0.0019, 0.0019, 0.0018],
          [0.0016, 0.0016, 0.0012,  ..., 0.0020, 0.0019, 0.0018],
          ...,
          [0.0026, 0.0022, 0.0020,  ..., 0.0023, 0.0019, 0.0028],
          [0.0026, 0.0023, 0.0022,  ..., 0.0025, 0.0024, 0.0028],
          [0.0025, 0.0023, 0.0023,  ..., 0.0021, 0.0023, 0.0024]],

         [[0.0020, 0.0021, 0.0017,  ..., 0.0025, 0.0024, 0.0024],
          [0.0018, 0.0019, 0.0016,  ..., 0.0025, 0.0024, 0.0024],
          [0.0017, 0.0017, 0.0015,  ..., 0.0025, 0.0024, 0.0024],
          ...,
          [0.0029, 0.0027, 0.0025,  ..., 0.0029, 0.0026, 0.0033],
          [0.0029, 0.0027, 0.0027,  ..., 0.0031, 0.0031, 0.0032],
          [0.0030, 0.0028, 0.0027,  ..., 0.0027, 0.0030, 0.0029]],

         [[0.0023, 0.0023, 0.0015,  ..., 0.0007, 0.0007, 0.0007],
          [0.0021, 0.0021, 0.0014,  ..., 0.0007, 0.0007, 0.0007],
          [0.0021, 0.0020, 0.0014,  ..., 0

## Working with tabular data

In [3]:
import pandas as pd

In [4]:
data_path = '../tmp/winequality-white.csv'
df = pd.read_csv(data_path, delimiter = ';')
df.head(), df.describe()

(   fixed acidity  volatile acidity  citric acid  ...  sulphates  alcohol  quality
 0            7.0              0.27         0.36  ...       0.45      8.8        6
 1            6.3              0.30         0.34  ...       0.49      9.5        6
 2            8.1              0.28         0.40  ...       0.44     10.1        6
 3            7.2              0.23         0.32  ...       0.40      9.9        6
 4            7.2              0.23         0.32  ...       0.40      9.9        6
 
 [5 rows x 12 columns],
        fixed acidity  volatile acidity  ...      alcohol      quality
 count    4898.000000       4898.000000  ...  4898.000000  4898.000000
 mean        6.854788          0.278241  ...    10.514267     5.877909
 std         0.843868          0.100795  ...     1.230621     0.885639
 min         3.800000          0.080000  ...     8.000000     3.000000
 25%         6.300000          0.210000  ...     9.500000     5.000000
 50%         6.800000          0.260000  ...    10

In [30]:
cols_names = df.columns
cols_names

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [8]:
wine_np = np.loadtxt(data_path, dtype=np.float32, delimiter=';', skiprows=1)
wine_np, wine_np.shape

(array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
        [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
        [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
        ...,
        [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
        [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
        [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32),
 (4898, 12))

In [9]:
tensor_df_ = torch.from_numpy(wine_np)
tensor_df_[:-1], tensor_df_[-1], tensor_df_.shape

(tensor([[ 7.0000,  0.2700,  0.3600,  ...,  0.4500,  8.8000,  6.0000],
         [ 6.3000,  0.3000,  0.3400,  ...,  0.4900,  9.5000,  6.0000],
         [ 8.1000,  0.2800,  0.4000,  ...,  0.4400, 10.1000,  6.0000],
         ...,
         [ 6.6000,  0.3200,  0.3600,  ...,  0.4600,  9.6000,  5.0000],
         [ 6.5000,  0.2400,  0.1900,  ...,  0.4600,  9.4000,  6.0000],
         [ 5.5000,  0.2900,  0.3000,  ...,  0.3800, 12.8000,  7.0000]]),
 tensor([6.0000e+00, 2.1000e-01, 3.8000e-01, 8.0000e-01, 2.0000e-02, 2.2000e+01,
         9.8000e+01, 9.8941e-01, 3.2600e+00, 3.2000e-01, 1.1800e+01, 6.0000e+00]),
 torch.Size([4898, 12]))

In [11]:
X = tensor_df_[:, :-1] # data
y = tensor_df_[:, -1].to(dtype=torch.int32) # labels
X.shape, y.shape

(torch.Size([4898, 11]), torch.Size([4898]))

In [12]:
y_unsqz = y.unsqueeze(1)
y_unsqz.shape

torch.Size([4898, 1])

#### PyTorch Tensor API to manipulate our data in tensor form

In [14]:
 torch.mean(X, dim=0), torch.std(X, dim=0)

(tensor([6.8548e+00, 2.7824e-01, 3.3419e-01, 6.3914e+00, 4.5772e-02, 3.5308e+01,
         1.3836e+02, 9.9403e-01, 3.1883e+00, 4.8985e-01, 1.0514e+01]),
 tensor([8.4387e-01, 1.0079e-01, 1.2102e-01, 5.0721e+00, 2.1848e-02, 1.7007e+01,
         4.2498e+01, 2.9909e-03, 1.5100e-01, 1.1413e-01, 1.2306e+00]))

In [24]:
bad_indicies = y <= 3
bad_indicies.shape

torch.Size([4898])

In [28]:
pretty_data, bad_data = X[ ~ (bad_indicies) ], X[ bad_indicies ]
pretty_data.shape, bad_data.shape

(torch.Size([4878, 11]), torch.Size([20, 11]))

## Working with time series

In [32]:
bike_df = pd.read_csv('../tmp/hour.csv')
bike_df.head(), bike_df.describe(), bike_df.columns

(   instant      dteday  season  yr  ...  windspeed  casual  registered  cnt
 0        1  2011-01-01       1   0  ...        0.0       3          13   16
 1        2  2011-01-01       1   0  ...        0.0       8          32   40
 2        3  2011-01-01       1   0  ...        0.0       5          27   32
 3        4  2011-01-01       1   0  ...        0.0       3          10   13
 4        5  2011-01-01       1   0  ...        0.0       0           1    1
 
 [5 rows x 17 columns],
           instant        season  ...    registered           cnt
 count  17379.0000  17379.000000  ...  17379.000000  17379.000000
 mean    8690.0000      2.501640  ...    153.786869    189.463088
 std     5017.0295      1.106918  ...    151.357286    181.387599
 min        1.0000      1.000000  ...      0.000000      1.000000
 25%     4345.5000      2.000000  ...     34.000000     40.000000
 50%     8690.0000      3.000000  ...    115.000000    142.000000
 75%    13034.5000      3.000000  ...    220.00000