<a href="https://colab.research.google.com/github/9-coding/PyTorch/blob/main/04_changing_dtype_shape.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# dtype, shape 변경

In [1]:
import numpy as np
import torch
import tensorflow as tf

In [2]:
for c in [np, torch, tf]:
  print(c.__name__, c.__version__)

numpy 1.25.2
torch 2.2.1+cu121
tensorflow 2.15.0


In [3]:
# tensor 생성
t = (1,2,3,4)

a_np = np.array(t)
a_torch = torch.tensor(t)
a_tf = tf.constant(t)

## dtype 변경

**dtype: The data type of element**

바꾸는 원본 tensor인스턴스를 기반으로 원하는 dtype로 구성된 새로운 tensor인스턴스가 생성됨 (연결X)

### numpy

- `ndarray.astype(desired_dtype)`
- `np.uint8(src_array)`

### pytorch

- `torch.tensor.type(desired_dtype)`
- `tensor.to(desired_dtype)`

### tensorflow

- `tensorflow.dtypes.cast(src_tensor, desired_dtype)`

In [4]:
a = np.ones((3,3))
b = np.uint8(a)
c = a.astype('float32')
print(c)

print(id(a),a.dtype)
print(id(b),b.dtype)
print(id(c),c.dtype)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
140536783614672 float64
140536783614768 uint8
140531646361936 float32


In [5]:
print(a_torch.dtype)
print(a_torch.to(dtype = torch.float32)) # to: dtype을 어딘가로 보냄.
print(a_torch.dtype)

torch.int64
tensor([1., 2., 3., 4.])
torch.int64


### Torch

In [7]:
a_torch = torch.rand(3,4)
b_torch = a_torch.to(torch.uint8) # 보통 데이터를 gpu 또는 cpu로 옮길 때 사용. 형 변환이 필요하기 때문에 함께 쓰임.
c_torch = a_torch.type(torch.float64)

print(a_torch)
print(b_torch)
print(c_torch)
print(id(a_torch), a_torch.dtype)
print(id(b_torch), b_torch.dtype)
print(id(c_torch), c_torch.dtype)

b_torch[0,1] = 9
c_torch[0,0] = 1000
print()
print(a_torch)
print(b_torch)
print(c_torch) # b_torch와 c_torch의 값을 바꿔도 a_torch의 값이 변하지 않음.

tensor([[0.9012, 0.9598, 0.0884, 0.3977],
        [0.6745, 0.2790, 0.0779, 0.5111],
        [0.9498, 0.9450, 0.5527, 0.2526]])
tensor([[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]], dtype=torch.uint8)
tensor([[0.9012, 0.9598, 0.0884, 0.3977],
        [0.6745, 0.2790, 0.0779, 0.5111],
        [0.9498, 0.9450, 0.5527, 0.2526]], dtype=torch.float64)
140533210271152 torch.float32
140533210270832 torch.uint8
140533210280512 torch.float64

tensor([[0.9012, 0.9598, 0.0884, 0.3977],
        [0.6745, 0.2790, 0.0779, 0.5111],
        [0.9498, 0.9450, 0.5527, 0.2526]])
tensor([[0, 9, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]], dtype=torch.uint8)
tensor([[1.0000e+03, 9.5976e-01, 8.8353e-02, 3.9766e-01],
        [6.7447e-01, 2.7896e-01, 7.7905e-02, 5.1112e-01],
        [9.4979e-01, 9.4501e-01, 5.5273e-01, 2.5259e-01]], dtype=torch.float64)


### TensorFlow

In [8]:
a_tf = tf.random.uniform(shape=(3,4))
c_tf = tf.dtypes.cast(a_tf,tf.float64)
print(c_tf)
print(id(a_tf), a_tf.dtype)
print(id(c_tf), c_tf.dtype)

tf.Tensor(
[[0.74271929 0.40627432 0.20696175 0.51956022]
 [0.50978422 0.6868403  0.20246887 0.72943413]
 [0.80793452 0.15891683 0.66476834 0.75395751]], shape=(3, 4), dtype=float64)
140537210747568 <dtype: 'float32'>
140531646506688 <dtype: 'float64'>


## Shape 변경

- shape는 tensor의 각 축의 크기를 나타내는 sequence type의 인스턴스임.
- 즉, tensor의 크기와 형태를 나타냄.

### numpy

- `numpy.reshape(src_ndarray, desired_shape)`
- `numpy.array.reshape(desired_shape)`

### pytorch

- `torch.reshape(src_tensor, desired_shape)`
- `torch.tensor.reshape(desired_shape)`

### tensorflow

- `tensorflow.reshape(src_tensor, desired_shape)`

메모리가 contiguous해야 view를 만들 수 있음.

transpose는 행렬을 바꿔놓기 때문에 일정하게 쭉 올라가던 메모리에서 순서가 꼬이게 됨. is_contiguous 사용하면 인접한지 확인할 수 있음.

<br>

### numpy

In [9]:
a = np.arange(0,10,1) # [ s:e :step_size]
b = a.reshape((2,5))
print(a.shape,id(a))
print(b.shape,id(b))
c = np.reshape(a,(5,2))
print(c.shape,id(c))
c[0,0] = 1000
print(a)
print(b)
print(c)

(10,) 140531646363568
(2, 5) 140531380548496
(5, 2) 140531380543696
[1000    1    2    3    4    5    6    7    8    9]
[[1000    1    2    3    4]
 [   5    6    7    8    9]]
[[1000    1]
 [   2    3]
 [   4    5]
 [   6    7]
 [   8    9]]


### pytorch

In [10]:
a_torch = torch.arange(0,10,1)
b_torch = a_torch.reshape((2,5))
print(a_torch.shape,id(a_torch))
print(b_torch.shape,id(b_torch))
c_torch = torch.reshape(a_torch,(5,2))
print(c_torch.shape,id(c_torch))
c_torch[0,0] = 1000

print(a_torch)
print(b_torch)
print(c_torch)

torch.Size([10]) 140531380809984
torch.Size([2, 5]) 140531380808944
torch.Size([5, 2]) 140531380602656
tensor([1000,    1,    2,    3,    4,    5,    6,    7,    8,    9])
tensor([[1000,    1,    2,    3,    4],
        [   5,    6,    7,    8,    9]])
tensor([[1000,    1],
        [   2,    3],
        [   4,    5],
        [   6,    7],
        [   8,    9]])


### tensorflow

In [11]:
a_tensor = tf.range(0,10,1)
b_tensor = tf.reshape(a_tensor,(2,5))
# b_tensor = a_tensor.reshape((2,5)) # not working
print(a_tensor.shape,id(a_tensor))
print(b_tensor.shape,id(b_tensor))

c_tensor = tf.reshape(a_tensor,(5,2))
print(c_tensor.shape,id(c_tensor))

# 변경하고 싶은 위치와 값을 정의
indices = tf.constant([[0, 0]]) # (2, 2) 위치를 변경하고자 함
updates = tf.constant([999]) # 해당 위치에 넣고 싶은 값

# 업데이트 적용
c_tensor = tf.tensor_scatter_nd_update(c_tensor, indices, updates)

print(a_tensor)
print(b_tensor)
print(c_tensor)

(10,) 140531646511792
(2, 5) 140531652537936
(5, 2) 140531646508448
tf.Tensor([0 1 2 3 4 5 6 7 8 9], shape=(10,), dtype=int32)
tf.Tensor(
[[0 1 2 3 4]
 [5 6 7 8 9]], shape=(2, 5), dtype=int32)
tf.Tensor(
[[999   1]
 [  2   3]
 [  4   5]
 [  6   7]
 [  8   9]], shape=(5, 2), dtype=int32)
