In [1]:
!nvidia-smi
!python --version

Fri Aug  2 22:25:44 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.58.02              Driver Version: 555.58.02      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4050 ...    Off |   00000000:01:00.0 Off |                  N/A |
| N/A   50C    P0             17W /   80W |      44MiB /   6141MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
with open("dataset.txt", 'r', encoding="utf-8") as f:
    text = f.read()
print(f"Total no. of characters in the text file : {len(text)}")

characters = set(text)
print("Character Set : ")
print(characters)
print(f"Total distinct characters : {len(characters)}")

Total no. of characters in the text file : 41241
Character Set : 
{'W', 'g', 'U', 'p', ',', '5', 'D', 'v', 'a', '-', 'q', 'J', '*', ' ', 'b', 'E', 'Q', 'c', 'N', 'e', 's', 'L', 'K', '"', 'h', 'I', 'S', 'P', 'r', 'm', 'T', '.', ']', 'G', 'F', '_', 'V', 'B', 'x', 'j', '[', 'A', '\n', 'u', '1', 'k', 'Y', 'z', 'y', ';', 'w', 'n', 'O', '?', 'i', 'C', '9', 't', 'd', ':', 'o', '4', 'R', 'l', "'", 'M', '2', 'H', 'f', '!'}
Total distinct characters : 70


In [3]:
# Designing the tokenizer
# source : https://huggingface.co/docs/transformers/en/main_classes/tokenizer

# Character tokenizer
str_to_int = { char:ind for ind, char in enumerate(characters) }
print("String to Integer set : ")
print(str_to_int)

int_to_str = { ind:char for ind, char in enumerate(characters) }
print("Integer to String set : ")
print(int_to_str)

# Defining the encoder
def encoder(word):
    return [ str_to_int[char] for char in word ]

def decoder(lst):
    return ''.join([ int_to_str[i] for i in lst ])


encoded = encoder("Akilesh")
decoded = decoder(encoded)

print(f"Encoded information : {encoded} ")
print(f"Decoded information : {decoded} ")

String to Integer set : 
{'W': 0, 'g': 1, 'U': 2, 'p': 3, ',': 4, '5': 5, 'D': 6, 'v': 7, 'a': 8, '-': 9, 'q': 10, 'J': 11, '*': 12, ' ': 13, 'b': 14, 'E': 15, 'Q': 16, 'c': 17, 'N': 18, 'e': 19, 's': 20, 'L': 21, 'K': 22, '"': 23, 'h': 24, 'I': 25, 'S': 26, 'P': 27, 'r': 28, 'm': 29, 'T': 30, '.': 31, ']': 32, 'G': 33, 'F': 34, '_': 35, 'V': 36, 'B': 37, 'x': 38, 'j': 39, '[': 40, 'A': 41, '\n': 42, 'u': 43, '1': 44, 'k': 45, 'Y': 46, 'z': 47, 'y': 48, ';': 49, 'w': 50, 'n': 51, 'O': 52, '?': 53, 'i': 54, 'C': 55, '9': 56, 't': 57, 'd': 58, ':': 59, 'o': 60, '4': 61, 'R': 62, 'l': 63, "'": 64, 'M': 65, '2': 66, 'H': 67, 'f': 68, '!': 69}
Integer to String set : 
{0: 'W', 1: 'g', 2: 'U', 3: 'p', 4: ',', 5: '5', 6: 'D', 7: 'v', 8: 'a', 9: '-', 10: 'q', 11: 'J', 12: '*', 13: ' ', 14: 'b', 15: 'E', 16: 'Q', 17: 'c', 18: 'N', 19: 'e', 20: 's', 21: 'L', 22: 'K', 23: '"', 24: 'h', 25: 'I', 26: 'S', 27: 'P', 28: 'r', 29: 'm', 30: 'T', 31: '.', 32: ']', 33: 'G', 34: 'F', 35: '_', 36: 'V', 37: 

In [4]:
import torch
import numpy as np

In [5]:
# Handling larger data with tensors
encoded_text = torch.tensor(encoder(text), dtype=torch.long)
encoded_text

tensor([13, 13, 13,  ..., 48, 31, 42])

In [6]:
# Splitting the training and testing dataset ( 80:20 ratio )
n = int(0.8*len(text))
train_set = encoded_text[:n]
test_set = encoded_text[n:]

print(f"Train set : {train_set}")
print(f"Test set : {test_set}")

Train set : tensor([13, 13, 13,  ...,  8, 28, 58])
Test set : tensor([31, 42, 42,  ..., 48, 31, 42])


In [7]:
# Declaring the constants
BLOCK_SIZE = 18
BATCH_SIZE = 4

In [8]:
# Splitting the encoded data into bigrams
x = train_set[:BLOCK_SIZE]
y = train_set[1:BLOCK_SIZE+1]

for t in range(BLOCK_SIZE):
    context = x[:t+1]
    target = y[t]
    print(f"For the Input context -> {context}, the target value -> {target}")

For the Input context -> tensor([13]), the target value -> 13
For the Input context -> tensor([13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13, 13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13, 13, 13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13, 13, 13, 13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13, 13, 13, 13, 13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13]), the target value -> 13
For the Input context -> tensor([13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13]), the target valu

In [9]:
# Verifying the train_set
print(train_set[:40])

tensor([13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
        13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 67, 19, 13,  0, 24, 60, 13,
        26, 19, 28,  7])


In [10]:
# Checking the availability of the devices
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [11]:
# Comparing the performance between CPU and GPU
shape = (100, 100, 100)

# Loading the tensor in the GPU
gpu_tensor_1 = torch.rand(shape).to(device)
gpu_tensor_2 = torch.rand(shape).to(device)

# Loading a tensor in the CPU using CPU
cpu_tensor_1 = np.random.rand(100, 100, 100)
cpu_tensor_2 = np.random.rand(100, 100, 100)

import time

In [12]:
%%time

# Evaluating the time for the GPU operation
start_time = time.time()
gpu_result = gpu_tensor_1 @ gpu_tensor_2
end_time = time.time()

gpu_time = end_time - start_time

print(f"Time taken by GPU : {gpu_time}")

Time taken by GPU : 0.04595470428466797
CPU times: user 29.3 ms, sys: 14.4 ms, total: 43.7 ms
Wall time: 46.1 ms


In [13]:
%%time
# Evaluating the time for the CPU opearation
start_time = time.time()
cpu_result = cpu_tensor_1.dot(cpu_tensor_2)
end_time = time.time()

cpu_time = end_time - start_time

print(f"Time taken by CPU : {cpu_time}")

Time taken by CPU : 5.92145299911499
CPU times: user 5.21 s, sys: 702 ms, total: 5.91 s
Wall time: 5.92 s


In [28]:
# Upper triangular matrix
torch.triu(torch.ones((5, 5)))

tensor([[1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.]])

In [29]:
# Masked fill function in PyTorch tensor
masked_out = torch.zeros((5, 5)).masked_fill(torch.triu(torch.ones((5, 5))) == 0, float('inf'))
print("Masked tensors : ")
print(masked_out)

Masked tensors : 
tensor([[0., 0., 0., 0., 0.],
        [inf, 0., 0., 0., 0.],
        [inf, inf, 0., 0., 0.],
        [inf, inf, inf, 0., 0.],
        [inf, inf, inf, inf, 0.]])


In [30]:
# Example of torch.nn.Linear module

from torch.nn import Linear

# Initialising the Linear module 
# Perform the operations -> y = xA^T + b
linear_layer = Linear(10, 10, bias=True)
print(f"Linear layer instances : {linear_layer}")

# Checking the initial weight and bias
print("Layer weight : ")
print(linear_layer.weight)
print("Layer bias : ")
print(linear_layer.bias)

# multiply the mat1(1x10) and mat2(10x10)
in_tensor = torch.rand((1, 10))
print("Input tensors : ")
print(in_tensor)

out_tensor = linear_layer(in_tensor)
print("Output tensors : ")
print(out_tensor)

Linear layer instances : Linear(in_features=10, out_features=10, bias=True)
Layer weight : 
Parameter containing:
tensor([[-0.2891,  0.0110, -0.1532, -0.0784,  0.0460, -0.1411, -0.0891, -0.1020,
         -0.1600,  0.1876],
        [ 0.2810, -0.2462, -0.3046,  0.2006,  0.1112,  0.3092,  0.1315,  0.1318,
         -0.2010, -0.1233],
        [-0.1596, -0.3042,  0.2773,  0.0317,  0.3156, -0.0209, -0.0972,  0.0848,
         -0.2068, -0.3118],
        [-0.0726,  0.2966, -0.0307,  0.2418, -0.3014,  0.0796, -0.2393, -0.2154,
         -0.1649, -0.0529],
        [-0.1402, -0.3020, -0.0155, -0.2382,  0.2940, -0.2861,  0.2059, -0.0497,
          0.0451,  0.2967],
        [ 0.0953,  0.2637,  0.2004, -0.2375, -0.1823,  0.2173, -0.1855, -0.0125,
         -0.2493, -0.2728],
        [ 0.1997,  0.2051,  0.2904,  0.1485,  0.1018,  0.2292,  0.0184,  0.0171,
          0.0578,  0.2521],
        [ 0.2066,  0.2605, -0.1517,  0.2145,  0.1337, -0.2377, -0.1766, -0.1730,
         -0.0411,  0.0947],
        [-0.11

In [39]:
import torch.nn.functional as F

input_tensor = torch.arange(5, dtype=torch.float)
print(f"Input tensors : {input_tensor}")
# print(input_tensor.dtype)

# Performing the softmax function
softmax_output = F.softmax(input_tensor, dim=0)
print(f"Output tensor : {softmax_output}")

Input tensors : tensor([0., 1., 2., 3., 4.])
Output tensor : tensor([0.0117, 0.0317, 0.0861, 0.2341, 0.6364])
