In [113]:
# !wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt

In [None]:
with open('input.txt', 'r', encoding='utf-8') as f:
    text = f.read()
print(len(text))

In [None]:
print(text[:50])

In [None]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
chars = ''.join(chars)
chars

In [117]:
stoi = {ch:i for i,ch in enumerate(chars)}
itos = {i:ch for i,ch in enumerate(chars)}

encode = lambda ch: [stoi[c] for c in ch]
decode = lambda ix: ''.join([itos[i] for i in ix])

In [None]:
print(encode("crafting"))
print(decode([41, 56, 39, 44, 58, 47, 52, 45]))

In [None]:
import torch
data = torch.tensor([stoi[ch] for ch in text], dtype=torch.long)
print(data.shape, data.dtype)
data[:100]


In [120]:
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]

In [None]:
block_size = 8
train_data[:block_size+1]

In [None]:
x = train_data[:block_size]
y = train_data[1:block_size+1]

for i in range(block_size):
    input = x[:i+1]
    output = y[i]
    print(f'when input is {input} the target is {output}')


In [None]:
torch.manual_seed(1337)
batch_size = 4
block_size = 8

def get_batch(split):
    data = train_data if split == 'train' else val_data
    ix = torch.randint(len(data)-block_size, size=(batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x,y

xb,yb = get_batch('train')
print(xb,'\n',yb)

for b in range(batch_size):
    for c in range(block_size):
        x = xb[b,:c+1]
        y = yb[b,c]
        print(f'when input is {x} the output is {y}')


In [None]:
import torch
import torch.nn as nn
from torch.nn import functional as F
torch.manual_seed(1337)

class BigramLanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, vocab_size)

    def forward(self, idx, targets=None):
        logits = self.token_embedding_table(idx)

        if targets == None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss
    
    def generate(self, idx, max_new_tokens):
        for _ in range(max_new_tokens):
            logits, loss = self(idx)
            logits = logits[:, -1, :] # becomes (B, C)
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx,idx_next), dim=-1)
        return idx

model = BigramLanguageModel(vocab_size)
logits, loss = model(xb, yb)
# print(logits.shape)
# print(loss)

decode(model.generate(xb[:1,:], max_new_tokens=1000)[0].tolist())


In [125]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [None]:
batch_size = 32
for steps in range(10000):
    xb, yb = get_batch('train')

    logits, loss = model(xb, yb)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

print(loss.item())

In [None]:
print(decode(model.generate(xb[:1,:], max_new_tokens=1000)[0].tolist()))

THE MATHEMATICAL TRICK IN SELF-ATTENTION

In [None]:
torch.manual_seed(1337)
B,T,C = 4,8,5
x = torch.randn(B,T,C)
x.shape

In [None]:
xbow = torch.zeros((B,T,C))
avg_matrix = torch.tril(torch.ones(T,T))
avg_matrix /= torch.sum(avg_matrix, dim=-1, keepdim=True)
xbow = avg_matrix @ x

print(avg_matrix) # T, T
print(x[0]) # T, C
xbow[0]


In [269]:
torch.manual_seed(1337)
B,T,C = 4,8,32
x = torch.randn(B,T,C)


head_size = 16
key = nn.Linear(C, head_size, bias=False)
query = nn.Linear(C, head_size, bias=False)
value = nn.Linear(C, head_size, bias=False)
k = key(x) # (B, T, 16)
q = query(x) # (B, T, 16)
v = value(x)

In [None]:
wei = q @ k.transpose(-2, -1) * head_size**-0.5 # (B, T, 16) @ (B, 16, T) -> (B, T, T)
print(wei.var())
tril = torch.tril(torch.ones(T,T))
wei = wei.masked_fill(tril == 0, float('-inf'))

wei = F.softmax(wei, dim=-1)

out = wei @ v # (B, T, T) @ (B, T, C) -> (B, T, C)

out[0].shape

In [None]:
k.var()

In [None]:
q.var()

In [None]:
import torch
torch.randint(0, 20000, (1, 1024)).shape

In [78]:

from typing import List
import copy
class Solution:
    def findSafeWalk(self, grid: List[List[int]], health: int) -> bool:
        loc = [0,0]
        m,n = len(grid),len(grid[0])
        costMap = list(list(list(-1 for i in range(4)) for i in range(n)) for j in range(m))
        # print(costMap)
        # while not loc == [m-1,n-1] or not health == 0:
        # L,R,U,D -> y-1, y+1, x-1, x+1
        for x in range(m):
            for y in range(n):
                try:
                    if y > 0:
                        costMap[x][y][0] = grid[x][y-1]
                except:
                    pass
                try:
                    costMap[x][y][1] = grid[x][y+1]
                except:
                    pass
                try:
                    if x > 0:
                        costMap[x][y][2] = grid[x-1][y]
                except:
                    pass
                try:
                    costMap[x][y][3] = grid[x+1][y]
                except:
                    pass
                print(costMap[x][y])

        x,y = 0,0
        while not [x,y] == [m-1,n-1] or not health == 0:
                for z in range(4):
                    cost = costMap[x][y][z]
                    if cost == -1:
                        pass
                    elif cost == 0:


test = Solution()
test.findSafeWalk(grid = [[0,1,0,0,0],[0,1,0,1,0],[0,0,0,1,0]], health = 1)

[-1, 1, -1, 0]
[0, 0, -1, 1]
[1, 0, -1, 0]
[0, 0, -1, 1]
[0, -1, -1, 0]
[-1, 1, 0, 0]
[0, 0, 1, 0]
[1, 1, 0, 0]
[0, 0, 0, 1]
[1, -1, 0, 0]
[-1, 0, 0, -1]
[0, 0, 1, -1]
[0, 1, 0, -1]
[0, 0, 1, -1]
[1, -1, 0, -1]


In [2]:
import heapq
from typing import List
import copy

class Solution:
    def findSafeWalk(self, grid: List[List[int]], health: int) -> bool:
        m,n = len(grid),len(grid[0])
        for i in range(m):
            print(grid[i])

        edges = {}
        for i in range(m*n):
            edges[i] = []
        # print(edges)

        # No. if the node = (x*n)+y
        for x in range(m):
            for y in range(n):
                try:
                    if x > 0:
                        edges[(x*n)+y].append([((x-1)*n)+y, grid[x-1][y]])
                except:
                    pass
                try:
                    edges[(x*n)+y].append([((x+1)*n)+y, grid[x+1][y]])
                except:
                    pass
                try:
                    if y > 0:
                        edges[(x*n)+y].append([(x*n)+y-1, grid[x][y-1]])
                except:
                    pass
                try:
                    edges[(x*n)+y].append([(x*n)+y+1, grid[x][y+1]])
                except:
                    pass

        # print(edges)

        minHeap = [[0, 0]]
        shortest = {}
        while minHeap:
            n1, w1 = heapq.heappop(minHeap)
            if n1 in shortest :
                if w1 > shortest[n1]:
                    continue
                else:
                    shortest[n1] = w1
            shortest[n1] = w1
            for n2, w2 in edges[n1]:
                if n2 in shortest:
                    if w2 > shortest[n2]:
                        continue
                    else:
                        shortest[n2] = w2
                heapq.heappush(minHeap, [n2, w1 + w2])
            # print(minHeap)
        # print(shortest)
        return shortest


test = Solution()
test.findSafeWalk(grid = [[0,1,0,0,0],[0,1,0,1,0],[0,0,0,1,0]], health = 1)

[0, 1, 0, 0, 0]
[0, 1, 0, 1, 0]
[0, 0, 0, 1, 0]


KeyboardInterrupt: 

In [65]:
from typing import Dict
import heapq
class Soluition:
    def shortestPath(self, n: int, edges: List[List[int]], src: int) -> Dict[int, int]:
        adj = {}
        n = 5
        for i in range(n):
            adj[i] = []

        for s, dst, dist in edges:
            adj[s].append([dst, dist])
        print(adj)
        shortest = {}

        minHeap = [[src, 0]]
        while minHeap:
            n1, w1 = heapq.heappop(minHeap)
            if n1 in shortest:
                continue
            shortest[n1] = w1

            for n2, w2 in adj[n1]:
                if n2 in shortest:
                    continue
                heapq.heappush(minHeap, [n2, w1 + w2])

        return shortest





sol = Soluition()
sol.shortestPath(5, [[0,1,10], [0,2,3], [1,3,2], [2,1,4], [2,3,8], [2,4,2], [3,4,5]], 0)

{0: [[1, 10], [2, 3]], 1: [[3, 2]], 2: [[1, 4], [3, 8], [4, 2]], 3: [[4, 5]], 4: []}


{0: 0, 1: 10, 2: 3, 3: 11, 4: 5}

In [56]:
a = {"w", "x", "y", "z", 0,0}
# a = ("w", "x", "y", "z", 0)
print(type(a))

# a[0]

<class 'set'>
0
z
w
x
y
