# Images

In [1]:
import imageio
import torch

In [2]:
img_arr = imageio.imread('./p1ch4/image-dog/bobby.jpg')
img_arr.shape, type(img_arr)

((720, 1280, 3), imageio.core.util.Array)

In [3]:
img = torch.from_numpy(img_arr)
#PyTorch modules dealing with image data require tensors to be laid out as C × H × W
out = img.permute(2, 0, 1) 
out.shape

torch.Size([3, 720, 1280])

In [4]:
# N × C × H × W
batch_size = 3
batch = torch.zeros(batch_size, 3, 256, 256, dtype=torch.uint8)

In [5]:
import io

In [6]:
data_dir = './p1ch4/image-cats/'
filenames = [name for name in os.listdir(data_dir) if os.path.splitext(name)[-1]== '.png']

for i, filename in enumerate(filenames):
    img_arr = imageio.imread(os.path.join(data_dir,filename))
    img_t = torch.from_numpy(img_arr)
    img_t = img_t.permute(2, 0, 1)
    img_t = img_t[:3] #Here we keep only the first three channels.
    batch[i] = img_t

## Normalizing the data

In [7]:
# this
# batch = batch.float()
# batch /= 255.

In [8]:
# or this
batch = batch.float()
n_channels = batch.shape[1]
for c in range(n_channels):
    mean = torch.mean(batch[:,0])
    std = torch.std(batch[:, c])
    batch[:, c] = (batch[:, c] - mean) / std

In [9]:
dir_path = "./p1ch4/volumetric-dicom/2-LUNG 3.0  B70f-04083"
vol_arr = imageio.volread(dir_path, 'DICOM')
vol_arr.shape

Reading DICOM (examining files): 1/99 files (1.0%99/99 files (100.0%)
  Found 1 correct series.
Reading DICOM (loading data): 30/99  (30.370/99  (70.799/99  (100.0%)


(99, 512, 512)

In [10]:
# So we’ll have to make room for the channel dimen-sion using unsqueeze
vol = torch.from_numpy(vol_arr).float()
vol = torch.unsqueeze(vol, 0)
vol.shape

torch.Size([1, 99, 512, 512])

# Tabular data

In [11]:
import csv
wine_path = "./p1ch4/tabular-wine/winequality-white.csv"
col_list = next(csv.reader(open(wine_path), delimiter=';'))
col_list

['fixed acidity',
 'volatile acidity',
 'citric acid',
 'residual sugar',
 'chlorides',
 'free sulfur dioxide',
 'total sulfur dioxide',
 'density',
 'pH',
 'sulphates',
 'alcohol',
 'quality']

In [12]:
import numpy as np
wineq_numpy = np.loadtxt(wine_path, dtype=np.float32, delimiter=";", skiprows=1)
wineq_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [13]:
wineq = torch.from_numpy(wineq_numpy)
wineq.shape, wineq.dtype

(torch.Size([4898, 12]), torch.float32)

In [14]:
data = wineq[:,:-1]
target = wineq[:,-1].long()
data.shape, target.shape

(torch.Size([4898, 11]), torch.Size([4898]))

In [15]:
data_mean = data.mean(dim=0)
data_var = data.var(dim=0)

In [16]:
data_normalized = (data - data_mean) / torch.sqrt(data_var)

# Time series

In [17]:
bikes_numpy = np.loadtxt(
    "./p1ch4/bike-sharing-dataset/hour-fixed.csv",
    dtype=np.float32,
    delimiter=",",
    skiprows=1,
    converters={1:lambda x:float(x[8:10])}
)
bikes = torch.from_numpy(bikes_numpy)

In [18]:
bikes.shape, bikes.stride()

(torch.Size([17520, 17]), (17, 1))

In [19]:
daily_bikes = bikes.view(-1, 24, bikes.shape[1])
# N × L(hours) × C(channels)  
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 24, 17]), (408, 17, 1))

In [20]:
daily_bikes.transpose_(1,2)
# N × C × L
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 17, 24]), (408, 1, 17))

In [21]:
first_day = bikes[:24].long()
wine_onehot = torch.zeros(first_day.shape[0], 4)
first_day[:,9]

tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2])

In [22]:
################
wine_onehot.scatter_(
    dim=1,
    index=first_day[:,9].unsqueeze(1).long() - 1,
    value=1.0
)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [23]:
torch.cat((bikes[:24], wine_onehot),1)[:1]

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
         16.0000,  1.0000,  0.0000,  0.0000,  0.0000]])

In [24]:
daily_weather_onehot = torch.zeros(daily_bikes.shape[0], 4,
daily_bikes.shape[2])
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [25]:
daily_weather_onehot.scatter_(
    1, daily_bikes[:,9,:].long().unsqueeze(1) - 1, 1.0)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [26]:
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)

In [27]:
temp = daily_bikes[:, 10, :]
temp_min = torch.min(temp)
temp_max = torch.max(temp)
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - temp_min) / (temp_max - temp_min))

In [28]:
# OR
temp = daily_bikes[:, 10, :]
daily_bikes[:, 10, :] = ((daily_bikes[:, 10, :] - torch.mean(temp))
/ torch.std(temp))

# Text

In [29]:
with open('./p1ch4/jane-austen/1342-0.txt', encoding='utf8') as f:
    text = f.read()

### One hot encoding per character

In [30]:
lines = text.split('\n')
# pick an arrbitrary line
line = lines[200]
line

'“Impossible, Mr. Bennet, impossible, when I am not acquainted with him'

In [31]:
letter_t = torch.zeros(len(line),128)
letter_t.shape

torch.Size([70, 128])

In [33]:
for i,letter in enumerate(line.lower().strip()):
    letter_index = ord(letter) if ord(letter) < 128 else 0
    letter_t[i][letter_index] = 1

### One hot encoding per word

In [35]:
def clean_words(input_str):
    punctuation = '.,;:"!?”“_-'
    word_list = input_str.lower().replace('\n',' ').split()
    word_list = [word.strip(punctuation) for word in word_list]
    return word_list
words_in_line = clean_words(line)
line, words_in_line

('“Impossible, Mr. Bennet, impossible, when I am not acquainted with him',
 ['impossible',
  'mr',
  'bennet',
  'impossible',
  'when',
  'i',
  'am',
  'not',
  'acquainted',
  'with',
  'him'])

In [38]:
word_list = sorted(set(clean_words(text)))
word2index_dict = {word: i for (i, word) in enumerate(word_list)}
len(word2index_dict), word2index_dict['impossible']

(7261, 3394)

In [40]:
word_t = torch.zeros(len(words_in_line), len(word2index_dict))
for i, word in enumerate(words_in_line):
    word_index = word2index_dict[word]
    word_t[i][word_index] = 1
    print('{:2} {:4} {}'.format(i, word_index, word))
print(word_t.shape)

 0 3394 impossible
 1 4305 mr
 2  813 bennet
 3 3394 impossible
 4 7078 when
 5 3315 i
 6  415 am
 7 4436 not
 8  239 acquainted
 9 7148 with
10 3215 him
torch.Size([11, 7261])


### Text embeddings

In [None]:
#

# Exercises

In [48]:
green_np = imageio.imread('./green.png')
green_t = torch.from_numpy(green_np)

bleu_np = imageio.imread('./bleu.png')
bleu_t = torch.from_numpy(bleu_np)

red_np = imageio.imread('./red.png')
red_t = torch.from_numpy(red_np)

green_t.shape, bleu_t.shape, red_t.shape

(torch.Size([75, 92, 4]), torch.Size([93, 107, 4]), torch.Size([62, 209, 4]))

In [51]:
green_t = green_t.float()
bleu_t = bleu_t.float()
red_t = red_t.float()
green_mean = green_t.mean()
bleu_mean = bleu_t.mean()
red_mean = red_t.mean()
green_mean, bleu_mean, red_mean

(tensor(153.7581), tensor(192.5881), tensor(147.0815))

In [59]:
for c in range(4):
    print("mean: image green channel:",c, torch.mean(green_t[:,:,c]))
print("\n")
for c in range(4):
    print("mean: image bleu channel:",c, torch.mean(bleu_t[:,:,c]))
print("\n")
for c in range(4):
    print("mean: image red channel:",c, torch.mean(red_t[:,:,c]))
#conclusion 0=bleu / 1=green / 2=red

mean: image green channel: 0 tensor(96.7245)
mean: image green channel: 1 tensor(173.1349)
mean: image green channel: 2 tensor(90.1729)
mean: image green channel: 3 tensor(255.)


mean: image bleu channel: 0 tensor(84.5910)
mean: image bleu channel: 1 tensor(183.1513)
mean: image bleu channel: 2 tensor(247.6102)
mean: image bleu channel: 3 tensor(255.)


mean: image red channel: 0 tensor(245.8974)
mean: image red channel: 1 tensor(34.1783)
mean: image red channel: 2 tensor(53.2501)
mean: image red channel: 3 tensor(255.)
