# Data Manipulation

In [1]:
import torch

In [2]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1660 Ti with Max-Q Design'

In [3]:
torch.cuda.get_device_properties(0)

_CudaDeviceProperties(name='NVIDIA GeForce GTX 1660 Ti with Max-Q Design', major=7, minor=5, total_memory=6143MB, multi_processor_count=24)

In [4]:
x = torch.arange(12, dtype = torch.float32)
x

tensor([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.])

In [5]:
x.numel()

12

In [6]:
x.shape

torch.Size([12])

In [7]:
x = x.reshape(3, 4)
x

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

In [8]:
torch.zeros((2,3,4))

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [9]:
torch.ones((2,3))

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [10]:
torch.randn((3,4))

tensor([[-0.5425, -1.4454,  0.9697, -1.9024],
        [-0.2378, -0.5520,  0.0963, -1.4861],
        [ 1.2482,  1.1169,  0.3245,  0.9829]])

In [11]:
torch.tensor([[2,1,4,3], [1,2,3,4], [4,3,2,1]])

tensor([[2, 1, 4, 3],
        [1, 2, 3, 4],
        [4, 3, 2, 1]])

In [12]:
x[-1], x[1:3]

(tensor([ 8.,  9., 10., 11.]),
 tensor([[ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.]]))

In [13]:
x[1,2] = 17
x

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5., 17.,  7.],
        [ 8.,  9., 10., 11.]])

In [14]:
x[:2, :] = 12
x

tensor([[12., 12., 12., 12.],
        [12., 12., 12., 12.],
        [ 8.,  9., 10., 11.]])

In [15]:
torch.exp(x)

tensor([[162754.7969, 162754.7969, 162754.7969, 162754.7969],
        [162754.7969, 162754.7969, 162754.7969, 162754.7969],
        [  2980.9580,   8103.0840,  22026.4648,  59874.1406]])

In [16]:
x = torch.tensor([1.0,2,4,8])
y = torch.tensor([2,2,2,2])
x+y, x-y, x*y,x/y,x**y

(tensor([ 3.,  4.,  6., 10.]),
 tensor([-1.,  0.,  2.,  6.]),
 tensor([ 2.,  4.,  8., 16.]),
 tensor([0.5000, 1.0000, 2.0000, 4.0000]),
 tensor([ 1.,  4., 16., 64.]))

In [17]:
X = torch.arange(12, dtype = torch.float32).reshape((3,4))
Y = torch.tensor([[2.0, 1, 4,3],[1,2,3,4],[4,3,2,1]])
torch.cat((X,Y), dim = 0), torch.cat((X,Y), dim = 1)

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [ 2.,  1.,  4.,  3.],
         [ 1.,  2.,  3.,  4.],
         [ 4.,  3.,  2.,  1.]]),
 tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
         [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
         [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]]))

In [18]:
X == Y

tensor([[False,  True, False,  True],
        [False, False, False, False],
        [False, False, False, False]])

In [19]:
X.sum(), X.sum(axis = 0), X.sum(axis = 1)

(tensor(66.), tensor([12., 15., 18., 21.]), tensor([ 6., 22., 38.]))

In [20]:
a = torch.arange(3).reshape((3,1))
b = torch.arange(2).reshape((1,-1))
a,b, a+b

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]),
 tensor([[0, 1],
         [1, 2],
         [2, 3]]))

## Saving Memory
Running operations can cause new memory to be allocated to host results. For example, if we write Y = X + Y, we dereference the tensor that Y used to point to and instead point Y at the newly allocated memory. We can demonstrate this issue with Python’s id() function, which gives us the exact address of the referenced object in memory. Note that after we run Y = Y + X, id(Y) points to a different location. That’s because Python first evaluates Y + X, allocating new memory for the result and then points Y to this new location in memory.

In [21]:
before = id(Y)
Y = Y+X
id(Y) == before

False

In [22]:
Z = torch.zeros_like(Y)
print(f'id(Z): {id(Z)}')
Z[:] = X+Y
print(f'id(Z): {id(Z)}')

id(Z): 2878201726592
id(Z): 2878201726592


If X is not used after this we can also do:

In [23]:
print(f'id(X): {id(X)}')
X[:] = X+Y
print(f'id(X): {id(X)}')

id(X): 2878201736192
id(X): 2878201736192


In [24]:
A = X.numpy()
B = torch.from_numpy(A)
type(A), type(B)

(numpy.ndarray, torch.Tensor)

In [25]:
a = torch.tensor([3.5])
a, a.item(), float(a), int(a)

(tensor([3.5000]), 3.5, 3.5, 3)

In [26]:
X==Y, X<Y, X>Y

(tensor([[ True, False, False, False],
         [False, False, False, False],
         [False, False, False, False]]),
 tensor([[False, False, False, False],
         [False, False, False, False],
         [False, False, False, False]]),
 tensor([[False,  True,  True,  True],
         [ True,  True,  True,  True],
         [ True,  True,  True,  True]]))

In [27]:
a = torch.arange(25, dtype = torch.float32).reshape((5,5))
b = torch.arange(5, dtype = torch.float32).reshape((1,5))
a+b

tensor([[ 0.,  2.,  4.,  6.,  8.],
        [ 5.,  7.,  9., 11., 13.],
        [10., 12., 14., 16., 18.],
        [15., 17., 19., 21., 23.],
        [20., 22., 24., 26., 28.]])

## Data Preprocessing

In [28]:
import os

os.makedirs(os.path.join('.', 'data'), exist_ok=True)
data_file = os.path.join('.', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('''NumRooms,RoofType,Price
    NA,NA,127500
    2,NA,106000
    4,Slate,178100
    NA,NA,140000
    ''')

In [29]:
import pandas as pd

data = pd.read_csv(data_file)
print(data)

  NumRooms RoofType   Price
0       NA      NaN  127500
1        2      NaN  106000
2        4    Slate  178100
3       NA      NaN  140000


In [30]:
inputs, targets = data.iloc[:,:2], data.iloc[:,-1]
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   NumRooms_    2  NumRooms_    4  NumRooms_    NA  NumRooms_nan  \
0               0               0                1             0   
1               1               0                0             0   
2               0               1                0             0   
3               0               0                1             0   

   RoofType_Slate  RoofType_nan  
0               0             1  
1               0             1  
2               1             0  
3               0             1  


In [31]:
inputs = inputs.fillna(inputs.mean())
print(inputs)

   NumRooms_    2  NumRooms_    4  NumRooms_    NA  NumRooms_nan  \
0               0               0                1             0   
1               1               0                0             0   
2               0               1                0             0   
3               0               0                1             0   

   RoofType_Slate  RoofType_nan  
0               0             1  
1               0             1  
2               1             0  
3               0             1  


In [32]:
X, y = torch.tensor(inputs.values), torch.tensor(targets.values)
X,y

(tensor([[0, 0, 1, 0, 0, 1],
         [1, 0, 0, 0, 0, 1],
         [0, 1, 0, 0, 1, 0],
         [0, 0, 1, 0, 0, 1]], dtype=torch.uint8),
 tensor([127500, 106000, 178100, 140000]))

In [33]:
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data')
data

Unnamed: 0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
0,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
1,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
2,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
3,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
4,I,0.425,0.300,0.095,0.3515,0.1410,0.0775,0.1200,8
...,...,...,...,...,...,...,...,...,...
4171,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4172,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4173,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4174,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [34]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4176 entries, 0 to 4175
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   M       4176 non-null   object 
 1   0.455   4176 non-null   float64
 2   0.365   4176 non-null   float64
 3   0.095   4176 non-null   float64
 4   0.514   4176 non-null   float64
 5   0.2245  4176 non-null   float64
 6   0.101   4176 non-null   float64
 7   0.15    4176 non-null   float64
 8   15      4176 non-null   int64  
dtypes: float64(7), int64(1), object(1)
memory usage: 293.8+ KB


## Linear Algebra

In [35]:
x = torch.tensor(3.0)
y = torch.tensor(2.0)
x+y,x*y,x/y,x**y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

In [36]:
# Vectors
x = torch.arange(3)
x

tensor([0, 1, 2])

In [37]:
x[2]

tensor(2)

In [38]:
len(x)

3

In [39]:
x.shape

torch.Size([3])

In [40]:
#Matrices
A = torch.arange(6).reshape((3,-1))
A

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [41]:
A.T

tensor([[0, 2, 4],
        [1, 3, 5]])

In [42]:
A = torch.tensor([[1,2,3],[2,0,4],[3,4,5]])
A==A.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

In [43]:
torch.arange(24).reshape(2,3,4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

In [44]:
A = torch.arange(6, dtype = torch.float32).reshape(2,3)
B = A.clone()
A, A+B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[ 0.,  2.,  4.],
         [ 6.,  8., 10.]]))

In [45]:
# Element-wise product
A*B

tensor([[ 0.,  1.,  4.],
        [ 9., 16., 25.]])

In [46]:
# Scalar Multiplication
2*A

tensor([[ 0.,  2.,  4.],
        [ 6.,  8., 10.]])

In [47]:
# Reduction
x = torch.arange(3,dtype=torch.float32)
x.sum()

tensor(3.)

In [48]:
A.shape, A.sum()

(torch.Size([2, 3]), tensor(15.))

In [49]:
A.shape, A.sum(axis = 0).shape

(torch.Size([2, 3]), torch.Size([3]))

In [50]:
A.sum(axis=[0,1])==A.sum()

tensor(True)

In [51]:
A.mean(), A.sum()/A.numel()

(tensor(2.5000), tensor(2.5000))

In [52]:
A.mean(axis = 0), A.sum(axis = 0)/A.shape[0]

(tensor([1.5000, 2.5000, 3.5000]), tensor([1.5000, 2.5000, 3.5000]))

In [53]:
sum_A = A.sum(axis = 1, keepdims = True)
sum_A, sum_A.shape

(tensor([[ 3.],
         [12.]]),
 torch.Size([2, 1]))

In [54]:
A/sum_A

tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])

In [55]:
A.cumsum(axis = 0)

tensor([[0., 1., 2.],
        [3., 5., 7.]])

In [56]:
y = torch.ones(3, dtype = torch.float32)
x, y, torch.dot(x,y)

(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))

In [57]:
torch.sum(x*y)

tensor(3.)

In [58]:
A.shape, x.shape, torch.mv(A,x), A@x

(torch.Size([2, 3]), torch.Size([3]), tensor([ 5., 14.]), tensor([ 5., 14.]))

In [59]:
B = torch.ones(3,4)
torch.mm(A,B), A@B

(tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]))

In [60]:
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

In [61]:
torch.abs(u).sum()

tensor(7.)

In [62]:
torch.norm(torch.ones((4,9)))

tensor(6.)

In [63]:
(A.T).T == A

tensor([[True, True, True],
        [True, True, True]])

In [64]:
A = torch.randn((3,4))
B = torch.randn((3,4))

In [65]:
A.T + B.T == (A+B).T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True]])

In [66]:
A = torch.randn((3,3))
A+A.T

tensor([[ 0.3672,  1.6869,  0.3990],
        [ 1.6869,  1.5050, -1.1247],
        [ 0.3990, -1.1247, -0.5968]])

In [67]:
len(torch.ones((2,3,4)))

2

In [68]:
A/A.sum(axis = 1)

tensor([[ 0.1403,  0.3091, -0.6436],
        [ 0.9634,  0.5464,  0.5854],
        [-0.2298, -0.3544,  0.2744]])

In [69]:
torch.linalg.norm(torch.ones((2,3,4)))

tensor(4.8990)

## Automatic Differentiation

In [70]:
x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [71]:
x.requires_grad_(True)
x.grad

In [72]:
y = 2 * torch.dot(x, x)
y

tensor(28., grad_fn=<MulBackward0>)

In [73]:
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [74]:
x.grad==4*x

tensor([True, True, True, True])

In [75]:
x.grad.zero_()
y = x.sum()
y.backward()
x.grad

tensor([1., 1., 1., 1.])

In [76]:
y

tensor(6., grad_fn=<SumBackward0>)

For non scalars we need to provide a vector to tell PyTorch how to reduce the object to a scalar.

In [77]:
x.grad.zero_()
y = x*x
y.backward(gradient=torch.ones(len(y)))
x.grad

tensor([0., 2., 4., 6.])

## Detaching Computation
Sometimes, we wish to move some calculations outside of the recorded computational graph. For example, say that we use the input to create some auxiliary intermediate terms for which we do not want to compute a gradient. In this case, we need to detach the respective computational influence graph from the final result. The following toy example makes this clearer: suppose we have z = x * y and y = x * x but we want to focus on the direct influence of x on z rather than the influence conveyed via y. In this case, we can create a new variable u that takes the same value as y but whose provenance (how it was created) has been wiped out. Thus u has no ancestors in the graph and gradients to not flow through u to x. For example, taking the gradient of z = x * u will yield the result x, (not 3 * x * x as you might have expected since z = x * x * x).

In [78]:
x.grad.zero_()
y = x*x
u = y.detach()
z = u * x
z.sum().backward()
x.grad == u

tensor([True, True, True, True])

In [79]:
x.grad.zero_()
y.sum().backward()
x.grad == 2*x

tensor([True, True, True, True])

In [80]:
def f(a):
    b = a * 2
    while b.norm() < 1000:
        b = b*2
    if b.sum() > 0:
        c = b
    else:
        c = 100*b
    return c

In [81]:
a = torch.randn(size = (), requires_grad=True)
d = f(a)
d.backward()

In [82]:
a.grad == d/a

tensor(False)

In [84]:
!pip install matplotlib

Collecting matplotlib
  Downloading matplotlib-3.6.2-cp310-cp310-win_amd64.whl (7.2 MB)
     ---------------------------------------- 7.2/7.2 MB 9.6 MB/s eta 0:00:00
Collecting kiwisolver>=1.0.1
  Using cached kiwisolver-1.4.4-cp310-cp310-win_amd64.whl (55 kB)
Collecting cycler>=0.10
  Using cached cycler-0.11.0-py3-none-any.whl (6.4 kB)
Collecting fonttools>=4.22.0
  Downloading fonttools-4.38.0-py3-none-any.whl (965 kB)
     ------------------------------------- 965.4/965.4 kB 10.2 MB/s eta 0:00:00
Collecting contourpy>=1.0.1
  Downloading contourpy-1.0.6-cp310-cp310-win_amd64.whl (163 kB)
     -------------------------------------- 163.6/163.6 kB 9.6 MB/s eta 0:00:00
Installing collected packages: kiwisolver, fonttools, cycler, contourpy, matplotlib
Successfully installed contourpy-1.0.6 cycler-0.11.0 fonttools-4.38.0 kiwisolver-1.4.4 matplotlib-3.6.2


In [85]:
import random
from torch.distributions.multinomial import Multinomial
import matplotlib.pyplot as plt
%matplotlib inline

In [86]:
num_tosses = 100
heads = sum([random.random() > 0.5 for _ in range(num_tosses)])
tails = num_tosses - heads
print(f"heads: {heads};tails:{tails}")

heads: 59;tails:41


In [87]:
fair_probs = torch.tensor([0.5, 0.5])
Multinomial(100, fair_probs).sample()

tensor([46., 54.])

In [88]:
Multinomial(100, fair_probs).sample()/100

tensor([0.4600, 0.5400])

In [89]:
counts = Multinomial(10000, fair_probs).sample()
counts/10000

tensor([0.4975, 0.5025])