In [8]:
!nvidia-smi
!python --version

Fri Aug  2 16:47:30 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.58.02              Driver Version: 555.58.02      CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4050 ...    Off |   00000000:01:00.0 Off |                  N/A |
| N/A   42C    P0             16W /   80W |      13MiB /   6141MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [9]:
with open("dataset.txt", 'r', encoding="utf-8") as f:
    text = f.read()
print(f"Total no. of characters in the text file : {len(text)}")

characters = set(text)
print("Character Set : ")
print(characters)
print(f"Total distinct characters : {len(characters)}")

Total no. of characters in the text file : 41241
Character Set : 
{'.', 'c', '1', 'b', '4', 'V', '\n', '9', '?', 'k', '_', '!', 'z', 'q', 'f', 'e', 'C', 'a', 't', 'Y', 'U', 'O', "'", 'w', '5', 'Q', 'B', 'A', 'M', 'K', 'i', 'H', ';', 'P', 'n', 'T', ']', 'F', 'R', ',', 'p', '*', 'v', 'r', 'u', '-', 'L', 'D', 'E', 'J', 'I', '[', 's', 'x', ':', 'S', 'N', 'h', ' ', 'd', 'y', 'l', 'W', 'j', 'G', '2', '"', 'm', 'o', 'g'}
Total distinct characters : 70


In [10]:
# Designing the tokenizer
# source : https://huggingface.co/docs/transformers/en/main_classes/tokenizer

# Character tokenizer
str_to_int = { char:ind for ind, char in enumerate(characters) }
print("String to Integer set : ")
print(str_to_int)

int_to_str = { ind:char for ind, char in enumerate(characters) }
print("Integer to String set : ")
print(int_to_str)

# Defining the encoder
def encoder(word):
    return [ str_to_int[char] for char in word ]

def decoder(lst):
    return ''.join([ int_to_str[i] for i in lst ])


encoded = encoder("Akilesh")
decoded = decoder(encoded)

print(f"Encoded information : {encoded} ")
print(f"Decoded information : {decoded} ")

String to Integer set : 
{'.': 0, 'c': 1, '1': 2, 'b': 3, '4': 4, 'V': 5, '\n': 6, '9': 7, '?': 8, 'k': 9, '_': 10, '!': 11, 'z': 12, 'q': 13, 'f': 14, 'e': 15, 'C': 16, 'a': 17, 't': 18, 'Y': 19, 'U': 20, 'O': 21, "'": 22, 'w': 23, '5': 24, 'Q': 25, 'B': 26, 'A': 27, 'M': 28, 'K': 29, 'i': 30, 'H': 31, ';': 32, 'P': 33, 'n': 34, 'T': 35, ']': 36, 'F': 37, 'R': 38, ',': 39, 'p': 40, '*': 41, 'v': 42, 'r': 43, 'u': 44, '-': 45, 'L': 46, 'D': 47, 'E': 48, 'J': 49, 'I': 50, '[': 51, 's': 52, 'x': 53, ':': 54, 'S': 55, 'N': 56, 'h': 57, ' ': 58, 'd': 59, 'y': 60, 'l': 61, 'W': 62, 'j': 63, 'G': 64, '2': 65, '"': 66, 'm': 67, 'o': 68, 'g': 69}
Integer to String set : 
{0: '.', 1: 'c', 2: '1', 3: 'b', 4: '4', 5: 'V', 6: '\n', 7: '9', 8: '?', 9: 'k', 10: '_', 11: '!', 12: 'z', 13: 'q', 14: 'f', 15: 'e', 16: 'C', 17: 'a', 18: 't', 19: 'Y', 20: 'U', 21: 'O', 22: "'", 23: 'w', 24: '5', 25: 'Q', 26: 'B', 27: 'A', 28: 'M', 29: 'K', 30: 'i', 31: 'H', 32: ';', 33: 'P', 34: 'n', 35: 'T', 36: ']', 37:

In [11]:
import torch
import numpy as np

In [12]:
# Handling larger data with tensors
encoded_text = torch.tensor(encoder(text), dtype=torch.long)
encoded_text

tensor([58, 58, 58,  ..., 60,  0,  6])

In [13]:
# Splitting the training and testing dataset ( 80:20 ratio )
n = int(0.8*len(text))
train_set = encoded_text[:n]
test_set = encoded_text[n:]

print(f"Train set : {train_set}")
print(f"Test set : {test_set}")

Train set : tensor([58, 58, 58,  ..., 17, 43, 59])
Test set : tensor([ 0,  6,  6,  ..., 60,  0,  6])


In [14]:
# Declaring the constants
BLOCK_SIZE = 18
BATCH_SIZE = 4

In [15]:
# Splitting the encoded data into bigrams
x = train_set[:BLOCK_SIZE]
y = train_set[1:BLOCK_SIZE+1]

for t in range(BLOCK_SIZE):
    context = x[:t+1]
    target = y[t]
    print(f"For the Input context -> {context}, the target value -> {target}")

For the Input context -> tensor([58]), the target value -> 58
For the Input context -> tensor([58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58, 58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58, 58, 58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58, 58, 58, 58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58, 58, 58, 58, 58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58]), the target value -> 58
For the Input context -> tensor([58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58]), the target valu

In [16]:
# Verifying the train_set
print(train_set[:40])

tensor([58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
        58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 31, 15, 58, 62, 57, 68, 58,
        55, 15, 43, 42])


In [17]:
# Checking the availability of the devices
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [18]:
# Comparing the performance between CPU and GPU
shape = (100, 100, 100)

# Loading the tensor in the GPU
gpu_tensor_1 = torch.rand(shape).to(device)
gpu_tensor_2 = torch.rand(shape).to(device)

# Loading a tensor in the CPU using CPU
cpu_tensor_1 = np.random.rand(100, 100, 100)
cpu_tensor_2 = np.random.rand(100, 100, 100)

import time

In [19]:
%%time

# Evaluating the time for the GPU operation
start_time = time.time()
gpu_result = gpu_tensor_1 @ gpu_tensor_2
end_time = time.time()

gpu_time = end_time - start_time

print(f"Time taken by GPU : {gpu_time}")

Time taken by GPU : 0.04956936836242676
CPU times: user 14 ms, sys: 32.9 ms, total: 46.9 ms
Wall time: 49.7 ms


In [20]:
%%time
# Evaluating the time for the CPU opearation
start_time = time.time()
cpu_result = cpu_tensor_1.dot(cpu_tensor_2)
end_time = time.time()

cpu_time = end_time - start_time

print(f"Time taken by CPU : {cpu_time}")

Time taken by CPU : 5.0816285610198975
CPU times: user 4.85 s, sys: 220 ms, total: 5.07 s
Wall time: 5.08 s


In [24]:
# Masked fill function in PyTorch tensor
masked_out = torch.zeros((5, 5)).masked_fill(torch.triu(torch.ones((5, 5))) == 0, float('inf'))
print("Masked tensors : ")
print(masked_out)

Masked tensors : 
tensor([[0., 0., 0., 0., 0.],
        [inf, 0., 0., 0., 0.],
        [inf, inf, 0., 0., 0.],
        [inf, inf, inf, 0., 0.],
        [inf, inf, inf, inf, 0.]])


In [47]:
# Example of torch.nn.Linear module

from torch.nn import Linear

# Initialising the Linear module 
# Perform the operations -> y = xA^T + b
linear_layer = Linear(10, 10, bias=True/)
print(f"Linear layer instances : {linear_layer}")

# Checking the initial weight and bias
print("Initial weight : ")
print(linear_layer.weight)
print("Initial bias : ")
print(linear_layer.bias)

# multiply the mat1(1x10) and mat2(10x10)
in_tensor = torch.rand((1, 10))
print("Input tensors : ")
print(in_tensor)

out_tensor = linear_layer(in_tensor)
print("Output tensors : ")
print(out_tensor)

Linear layer instances : Linear(in_features=10, out_features=10, bias=False)
Initial weight : 
Parameter containing:
tensor([[ 0.0190,  0.1485, -0.2902,  0.1924, -0.0275,  0.2078,  0.2534, -0.1120,
         -0.1297, -0.1313],
        [ 0.2927, -0.0912, -0.0653, -0.0087,  0.0990,  0.1928,  0.1312, -0.1907,
          0.2208, -0.0170],
        [ 0.1072,  0.2369, -0.0004, -0.1037, -0.2781, -0.1441,  0.0343, -0.1972,
          0.0558, -0.2881],
        [-0.0588,  0.0541, -0.2708,  0.2119,  0.1381, -0.0037, -0.1540,  0.1196,
         -0.0901, -0.1160],
        [ 0.1128,  0.2377,  0.2327,  0.2894, -0.2680,  0.1812, -0.3088, -0.1544,
         -0.2978,  0.0228],
        [-0.1779,  0.2228, -0.1863,  0.0325,  0.1078, -0.2185, -0.2857,  0.2137,
          0.3151,  0.1123],
        [-0.2007,  0.1843,  0.1605, -0.0117,  0.0172, -0.2916, -0.0191, -0.2422,
         -0.1637, -0.1020],
        [-0.1472,  0.2095,  0.2829, -0.1924,  0.0795, -0.1742,  0.2037, -0.1400,
          0.1254, -0.0831],
        [-0