In [1]:
import torch
import imageio.v2 as imageio
from PIL import Image

In [2]:
img1 = Image.open('.\\cat & dog\\cat_1.jpg')
img2 = Image.open('.\\cat & dog\\cat_2.jpg')
img3 = Image.open('.\\cat & dog\\my_puppy.jpg')

img_resize1 = img1.resize((256, 256))
img_resize2 = img2.resize((256, 256))
img_resize3 = img3.resize((256, 256))

img_resize1.save('.\\cat & dog\\cat_1.jpg')
img_resize2.save('.\\cat & dog\\cat_2.jpg')
img_resize3.save('.\\cat & dog\\my_puppy.jpg')

In [3]:
img_arr = imageio.imread('.\\cat & dog\\my_puppy.jpg')
img_arr.shape

(256, 256, 3)

In [4]:
img = torch.from_numpy(img_arr)
out = img.permute(2, 0, 1)
out.shape

torch.Size([3, 256, 256])

In [5]:
batch_size = 3
batch = torch.zeros(batch_size, 3, 256, 256, dtype=torch.uint8)

batch.shape, batch.dtype

(torch.Size([3, 3, 256, 256]), torch.uint8)

In [6]:
import os

data_dir = ".\\cat & dog\\"
filenames = [name for name in os.listdir(data_dir)
            if os.path.splitext(name)[-1] == '.jpg']

for i, filename in enumerate(filenames):
    img_arr = imageio.imread(os.path.join(data_dir, filename))
    img_t = torch.from_numpy(img_arr)
    img_t = img_t.permute(2, 0, 1)
    img_t = img_t[:3]
    batch[i] = img_t

In [7]:
batch = batch.float()
print(batch)
batch /= 255.0

tensor([[[[100., 107., 112.,  ..., 192., 195., 194.],
          [105., 109., 112.,  ..., 192., 193., 191.],
          [130., 128., 125.,  ..., 186., 189., 187.],
          ...,
          [126., 117., 112.,  ..., 165., 164., 166.],
          [196., 187., 168.,  ..., 184., 185., 189.],
          [194., 194., 189.,  ..., 174., 176., 179.]],

         [[ 99., 106., 111.,  ..., 200., 203., 202.],
          [104., 108., 111.,  ..., 200., 201., 199.],
          [129., 127., 124.,  ..., 192., 195., 193.],
          ...,
          [136., 125., 120.,  ..., 160., 159., 161.],
          [208., 196., 178.,  ..., 179., 180., 184.],
          [206., 206., 198.,  ..., 169., 171., 174.]],

         [[ 97., 104., 109.,  ..., 213., 216., 215.],
          [102., 106., 109.,  ..., 213., 214., 212.],
          [127., 125., 122.,  ..., 204., 207., 205.],
          ...,
          [146., 136., 131.,  ..., 157., 156., 158.],
          [222., 211., 190.,  ..., 176., 177., 181.],
          [222., 222., 213.,  ...

In [8]:
print(batch.shape)
print(batch)

torch.Size([3, 3, 256, 256])
tensor([[[[0.3922, 0.4196, 0.4392,  ..., 0.7529, 0.7647, 0.7608],
          [0.4118, 0.4275, 0.4392,  ..., 0.7529, 0.7569, 0.7490],
          [0.5098, 0.5020, 0.4902,  ..., 0.7294, 0.7412, 0.7333],
          ...,
          [0.4941, 0.4588, 0.4392,  ..., 0.6471, 0.6431, 0.6510],
          [0.7686, 0.7333, 0.6588,  ..., 0.7216, 0.7255, 0.7412],
          [0.7608, 0.7608, 0.7412,  ..., 0.6824, 0.6902, 0.7020]],

         [[0.3882, 0.4157, 0.4353,  ..., 0.7843, 0.7961, 0.7922],
          [0.4078, 0.4235, 0.4353,  ..., 0.7843, 0.7882, 0.7804],
          [0.5059, 0.4980, 0.4863,  ..., 0.7529, 0.7647, 0.7569],
          ...,
          [0.5333, 0.4902, 0.4706,  ..., 0.6275, 0.6235, 0.6314],
          [0.8157, 0.7686, 0.6980,  ..., 0.7020, 0.7059, 0.7216],
          [0.8078, 0.8078, 0.7765,  ..., 0.6627, 0.6706, 0.6824]],

         [[0.3804, 0.4078, 0.4275,  ..., 0.8353, 0.8471, 0.8431],
          [0.4000, 0.4157, 0.4275,  ..., 0.8353, 0.8392, 0.8314],
          [0.

In [9]:
n_channels = batch.shape[1]
print(n_channels)
for c in range(n_channels):
    mean = torch.mean(batch[:, c])
    std = torch.std(batch[:, c])
    batch[:, c] = (batch[:, c] - mean) / std

3


## CT Image (format == DICOM)

In [10]:
import imageio

dir_path = ".\\LUNG_img"
vol_arr = imageio.volread(dir_path, 'DICOM')
vol_arr.shape

Reading DICOM (examining files): 1/99 files (1.0%22/99 files (22.2%43/99 files (43.4%64/99 files (64.6%85/99 files (85.9%99/99 files (100.0%)
  Found 1 correct series.
Reading DICOM (loading data): 47/99  (47.597/99  (98.099/99  (100.0%)


(99, 512, 512)

In [11]:
vol = torch.from_numpy(vol_arr).float()
vol = torch.unsqueeze(vol, 0)

vol.shape

torch.Size([1, 99, 512, 512])

## Wine Quality Data (format == CSV)

In [12]:
import csv
import numpy as np

In [13]:
wine_path = ".\\wine_quality\\wine_quality.csv"
wine_numpy = np.loadtxt(wine_path, dtype = np.float32, delimiter = ';', skiprows=1)

wine_numpy

array([[ 7.  ,  0.27,  0.36, ...,  0.45,  8.8 ,  6.  ],
       [ 6.3 ,  0.3 ,  0.34, ...,  0.49,  9.5 ,  6.  ],
       [ 8.1 ,  0.28,  0.4 , ...,  0.44, 10.1 ,  6.  ],
       ...,
       [ 6.5 ,  0.24,  0.19, ...,  0.46,  9.4 ,  6.  ],
       [ 5.5 ,  0.29,  0.3 , ...,  0.38, 12.8 ,  7.  ],
       [ 6.  ,  0.21,  0.38, ...,  0.32, 11.8 ,  6.  ]], dtype=float32)

In [14]:
col_list = next(csv.reader(open(wine_path), delimiter=";"))

wine_numpy.shape, col_list

((4898, 12),
 ['fixed acidity',
  'volatile acidity',
  'citric acid',
  'residual sugar',
  'chlorides',
  'free sulfur dioxide',
  'total sulfur dioxide',
  'density',
  'pH',
  'sulphates',
  'alcohol',
  'quality'])

In [15]:
wine_q = torch.from_numpy(wine_numpy)
wine_q.shape, wine_q.dtype

(torch.Size([4898, 12]), torch.float32)

In [16]:
data = wine_q[:, :-1]
data, data.shape

(tensor([[ 7.0000,  0.2700,  0.3600,  ...,  3.0000,  0.4500,  8.8000],
         [ 6.3000,  0.3000,  0.3400,  ...,  3.3000,  0.4900,  9.5000],
         [ 8.1000,  0.2800,  0.4000,  ...,  3.2600,  0.4400, 10.1000],
         ...,
         [ 6.5000,  0.2400,  0.1900,  ...,  2.9900,  0.4600,  9.4000],
         [ 5.5000,  0.2900,  0.3000,  ...,  3.3400,  0.3800, 12.8000],
         [ 6.0000,  0.2100,  0.3800,  ...,  3.2600,  0.3200, 11.8000]]),
 torch.Size([4898, 11]))

In [17]:
target = wine_q[:, -1]
target, target.shape

(tensor([6., 6., 6.,  ..., 6., 7., 6.]), torch.Size([4898]))

## target value preprocess -> Case 1) Convert to Int

In [18]:
target = wine_q[:, -1].long()
target, target.shape

(tensor([6, 6, 6,  ..., 6, 7, 6]), torch.Size([4898]))

In [19]:
target_unsqueezed = target.unsqueeze(1)
target_unsqueezed, target_unsqueezed.shape

(tensor([[6],
         [6],
         [6],
         ...,
         [6],
         [7],
         [6]]),
 torch.Size([4898, 1]))

## target value preprocess -> Case 2) One-hot Encoding

In [20]:
target_onehot = torch.zeros(target.shape[0], 10)
target_onehot.scatter_(1, target.unsqueeze(1), 1.0), target_onehot.shape

(tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 1., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 torch.Size([4898, 10]))

## Data Normalization

In [21]:
data_mean = torch.mean(data ,dim = 0)
data_mean, data_mean.shape

(tensor([6.8548e+00, 2.7824e-01, 3.3419e-01, 6.3914e+00, 4.5772e-02, 3.5308e+01,
         1.3836e+02, 9.9403e-01, 3.1883e+00, 4.8985e-01, 1.0514e+01]),
 torch.Size([11]))

In [22]:
data_var = torch.var(data, dim = 0)
data_var, data_var.shape

(tensor([7.1211e-01, 1.0160e-02, 1.4646e-02, 2.5726e+01, 4.7733e-04, 2.8924e+02,
         1.8061e+03, 8.9455e-06, 2.2801e-02, 1.3025e-02, 1.5144e+00]),
 torch.Size([11]))

In [23]:
data_normalized = (data - data_mean) / torch.sqrt(data_var)
data_normalized, data_normalized.shape

(tensor([[ 1.7208e-01, -8.1761e-02,  2.1326e-01,  ..., -1.2468e+00,
          -3.4915e-01, -1.3930e+00],
         [-6.5743e-01,  2.1587e-01,  4.7996e-02,  ...,  7.3995e-01,
           1.3422e-03, -8.2419e-01],
         [ 1.4756e+00,  1.7450e-02,  5.4378e-01,  ...,  4.7505e-01,
          -4.3677e-01, -3.3663e-01],
         ...,
         [-4.2043e-01, -3.7940e-01, -1.1915e+00,  ..., -1.3130e+00,
          -2.6153e-01, -9.0545e-01],
         [-1.6054e+00,  1.1666e-01, -2.8253e-01,  ...,  1.0049e+00,
          -9.6251e-01,  1.8574e+00],
         [-1.0129e+00, -6.7703e-01,  3.7852e-01,  ...,  4.7505e-01,
          -1.4882e+00,  1.0448e+00]]),
 torch.Size([4898, 11]))

## Threshold designate

In [24]:
bad_indexes = target <= 3
bad_indexes.shape, bad_indexes.dtype, bad_indexes.sum()

(torch.Size([4898]), torch.bool, tensor(20))

In [25]:
bad_data = data[bad_indexes]
bad_data.shape

torch.Size([20, 11])

In [26]:
bad_data = data[target <= 3]
mid_data = data[(target > 3) & (target < 7)]
good_data = data[target >= 7]

bad_mean = torch.mean(bad_data, dim = 0)
mid_mean = torch.mean(mid_data, dim = 0)
good_mean = torch.mean(good_data, dim = 0)

for i, args in enumerate(zip(col_list, bad_mean, mid_mean, good_mean)):
    print('{:2} {:20} {:6.2f}, {:6.2f} {:6.2f}'.format(i, *args))

 0 fixed acidity          7.60,   6.89   6.73
 1 volatile acidity       0.33,   0.28   0.27
 2 citric acid            0.34,   0.34   0.33
 3 residual sugar         6.39,   6.71   5.26
 4 chlorides              0.05,   0.05   0.04
 5 free sulfur dioxide   53.33,  35.42  34.55
 6 total sulfur dioxide 170.60, 141.83 125.25
 7 density                0.99,   0.99   0.99
 8 pH                     3.19,   3.18   3.22
 9 sulphates              0.47,   0.49   0.50
10 alcohol               10.34,  10.26  11.42


In [27]:
total_sulfur_threshold = 141.83
total_sulfur_data = data[:, 6]
predicted_indexes = torch.lt(total_sulfur_data, total_sulfur_threshold)

predicted_indexes.shape, predicted_indexes.dtype, predicted_indexes.sum()

(torch.Size([4898]), torch.bool, tensor(2727))

In [28]:
actual_indexs = target > 5
actual_indexs.shape, actual_indexs.dtype, actual_indexs.sum()

(torch.Size([4898]), torch.bool, tensor(3258))

In [29]:
n_matches = torch.sum(actual_indexs & predicted_indexes).item()
n_predicted = torch.sum(predicted_indexes).item()
n_actual = torch.sum(actual_indexs).item()

n_matches, n_matches / n_predicted, n_matches  / n_actual

(2018, 0.74000733406674, 0.6193984039287906)