In [9]:
import torch
import time 
import numpy as np
device = 'cuda' if torch.cuda.is_available else 'cpu'
device

'cuda'

## Basic functions

In [10]:
randint = torch.randint(-100, 100, (6, ))
print(randint)

#Keeping this in mind for when we need BLOCKS for our BACTHES from our random CORPUS

tensor([  1, -24,  32,   3, -27,  74])


In [12]:
tensor = torch.tensor([[0.1, 1.2], [2.2, 3.1], [4.9, 5.2]])
print(tensor)
# we get a 3x2 matrix

tensor([[0.1000, 1.2000],
        [2.2000, 3.1000],
        [4.9000, 5.2000]])


In [13]:
zeros = torch.zeros(2, 3)
zeros

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [14]:
arange = torch.arange(5)
arange

tensor([0, 1, 2, 3, 4])

In [15]:
linespace = torch.linspace(3, 10, steps=5)
linespace

#Arithmetic progression

tensor([ 3.0000,  4.7500,  6.5000,  8.2500, 10.0000])

In [16]:
logspace= torch.logspace(start=-10, end = 10, steps = 5)
logspace 

tensor([1.0000e-10, 1.0000e-05, 1.0000e+00, 1.0000e+05, 1.0000e+10])

In [17]:
## Basically an DIAGONAL matrix 
eye = torch.eye(5)
eye 

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]])

In [18]:
a = torch.empty((2, 3), dtype=torch.int64)
empty = torch.empty_like(a)
empty

tensor([[0, 0, 0],
        [0, 0, 0]])

In [36]:
#PROBABILITIES

prob = torch.tensor([0.2, 0.8])
#40% chance of getting a 0, 60% chance of getting a 1
sample = torch.multinomial(prob, num_samples=10, replacement = True)
print(sample)

tensor([1, 0, 1, 1, 1, 1, 1, 1, 1, 1])


In [37]:
#CONCAT

tensor = torch.tensor([1, 2, 3, 4])
out = torch.cat((tensor, torch.tensor([5])), dim=0)
print(out)

# we will use this when we generating text given context

tensor([1, 2, 3, 4, 5])


So basically it will be zero, the we use the prob distrubution to concat the first char, and then the next ->

In [38]:
out = torch.tril(torch.ones(5, 5))
print(out)
#tril = triangle - lower

tensor([[1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.]])


This is imp. When we are trying to predict integers in a seq, we only know what is in the current history.
In the above ex, suppose we have predicted a 1 and the rest we havent, then we predicted anotehr 1 and the rest 3 are left, so here we are talking in history. (predicting while knowing what the answer is, just like in an exam. Basically in history of your knowledge, which answers would you predict)

### Imp function : nn.Linear 
- builds a fully connected dense linear neural network with y = x*wT+b
- W = weight matrix of size [out_features, in_features]
- each row of W corresponds to weights associated with one neurom in each layer
- b = bias vector of size [out_features] => it provides an offset to the linear transformation.

In [60]:
import torch.nn as nn

sample = torch.tensor([10., 10., 10.])
linear = nn.Linear(3, 3, bias = False)
##the parameters are (output dimensions, input dimensions)
print(linear(sample))

tensor([ 9.4093,  2.5464, -3.6148], grad_fn=<SqueezeBackward4>)


In [75]:
# SOFTMAX FUNCITON (basic) = exp(x)/sum(exp(x))

import torch.nn.functional as F

tensor1 = torch.tensor([1.0, 2.0, 3.0])
softmax_output = F.softmax(tensor1, dim=0)

print(softmax_output)

tensor([0.0900, 0.2447, 0.6652])


## Embedding Vector

Also known as embeddings - ***Type of numerical representation***

- Fundamental technique for converting categorical data such as words as discrete items, into continuous vectors
- Categorical data in its raw form cannot be directly used as input for most ML algorithms, especially NNs, hence.
- Unlike hot-encoding, where each category is represented as binary vector, embedding vectors are dense and continuous
- Embeddings assign each category a vector of floating-point numbers.
- 

In [89]:
import torch.nn as nn

vocab_size = 2600
embedding_dim = 100

embedding = nn.Embedding(vocab_size, embedding_dim)

input_indices = torch.LongTensor([1, 5, 3, 2])

embedded_output = embedding(input_indices)

print(embedded_output.shape)


torch.Size([4, 100])


## Dot product and Matrix Multiplication



In [93]:
a = torch.tensor([[1, 2], [3,4], [5, 6]])
b = torch.tensor([[7,8,9], [10, 11, 12]])

print(a @ b)
print(torch.matmul(a, b))

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])
tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])


## Messing around with the GPU 

In [19]:
# Performance mesure

start_time = time.time()
#matix ops
zeros = torch.zeros(100, 100)
end_time = time.time()

elapsed_time = end_time -start_time
print(f"{elapsed_time : 8f}")

 0.000000


In [28]:
# numpy on CPU and torch on CUDA

torch_rand1 = torch.rand(100, 100, 100, 100).to(device)
torch_rand2 = torch.rand(100, 100, 100, 100).to(device)
np_rand1 = torch.rand(100, 100, 100, 100)
np_rand2 = torch.rand(100, 100, 100, 100)

start_time = time.time()
rand = (torch_rand1 @ torch_rand2)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"{elapsed_time: .8f}", "GPU")

start_time = time.time()
rand = np.multiply(np_rand1, np_rand2)
end_time = time.time()

elapsed_time = end_time-start_time
print(f"{elapsed_time:.8f}", "CPU")


 0.02500033 GPU
0.14006114 CPU
