In [1]:
import imageio
import torch

## for one image

In [2]:
img_arr = imageio.imread("../data/dog.jpg")
print(type(img_arr)) # numpy-like array object
print(img_arr.shape) # height, width, channels

<class 'imageio.core.util.Array'>
(1003, 1999, 3)


In [3]:
# torch expects it in the form : channel, height, width
img = torch.from_numpy(img_arr)
print(img.shape)
out = img.permute(2,0,1)
print(out.shape)

torch.Size([1003, 1999, 3])
torch.Size([3, 1003, 1999])


## for multiple images

In [4]:
#  it's more efficient to first build up a tensor structure then populate it
batch_size = 3
batch = torch.zeros(batch_size, 3, 256, 256, dtype=torch.uint8)

In [5]:
import os 
for i, name in enumerate(os.listdir("../data/p1ch4/image-cats")):
    img_arr = imageio.imread(f"../data/p1ch4/image-cats/{name}")
    img = torch.from_numpy(img_arr)
    out = img.permute(2,0,1)[:3] # discard alpha channel if present
    batch[i] = out


In [6]:
batch.size()

torch.Size([3, 3, 256, 256])

In [7]:
# normalizing - bw [0,1] or [-1,1]
# this is beacuse they train better when normalized
batch = batch.float()
batch /= 255.0
# or, calculate mean and sd and scale so that op has zero mean and unit sd across each channel

## loading a ct scan

In [8]:
dir_path = "../data/p1ch4/volumetric-dicom/2-LUNG 3.0  B70f-04083"
vol_arr = imageio.volread(dir_path, 'DICOM')

Reading DICOM (examining files): 1/99 files (1.0%94/99 files (94.9%99/99 files (100.0%)
  Found 1 correct series.
Reading DICOM (loading data): 42/99  (42.487/99  (87.999/99  (100.0%)


In [9]:
vol_arr.shape

(99, 512, 512)

In [10]:
vol = torch.from_numpy(vol_arr).float()
vol.shape

torch.Size([99, 512, 512])

In [11]:
vol = torch.unsqueeze(vol, 0)
vol.shape


torch.Size([1, 99, 512, 512])

## timeseries data
has a dimension of time, whic his useful if we want to exploit causal relationships across a time period

In [12]:
import numpy as np

In [28]:
bikes_numpy = np.loadtxt("../data/p1ch4/bike-sharing-dataset/hour-fixed.csv",
    dtype=np.float32,
    delimiter=",",
    skiprows=1,
    converters={1: lambda x: float(x[8:10])}
)
bikes = torch.from_numpy(bikes_numpy)

In [29]:
bikes.shape, bikes.stride()
# 17520 hours, 17 axes/columns

(torch.Size([17520, 17]), (17, 1))

In [32]:
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
daily_bikes.shape

torch.Size([730, 24, 17])