In [1]:
with open('Transcript.txt', 'r',encoding = 'utf-8') as f:
    text = f.read()

text[:1000]

'Traditional Retail Supply Chain: The standard supply chain for retailers such as Walmart, Target, and Tesco PLC (Tesco) was driven by the orders retail buyers placed with suppliers, who coordinated the delivery of goods for sale. A significant portion of general merchandise was manufactured in Asia, and in 2016, U.S. retailers imported $479 billion of goods from China.Deciding what to place on shelves was a significant task for a store that could have more than 100,000 different items. Category buyers were responsible for selecting and pricing merchandise. Large retailers had approximately 40 categories, including housewares, toys, and fashion. A buyer normally set the assortment plan from quarter to quarter, accounting for changes in customer demand due to seasonal events such as Christmas, Easter, and back-to-school sale periods. In order to clear out inventory to make room for new product for the next season, retailers used a variety of approaches, including price discounts or mark

In [2]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
print('Total characters:', vocab_size)
print(''.join(chars))

Total characters: 79

 $&(),-.0123456789:;ABCDEFGHIJKLMNOPRSTUVWXYZ[]abcdefghijklmnopqrstuvwxyz–—’“”


In [3]:
#mapping from caracter to index
stoi = {ch:i for i,ch in enumerate(chars)}
#mapping from index to caracter
itos = {i:ch for i,ch in enumerate(chars)}

encode = lambda s : [stoi[c] for c in s]
decode = lambda l : ''.join([itos[i] for i in l])

In [4]:
import torch
data = torch.tensor(encode(text))
print(data.shape,data.dtype)

  from .autonotebook import tqdm as notebook_tqdm


torch.Size([27653]) torch.int64


In [5]:
n = int(0.9 * len(data))
train_data = data[:n]
val_data = data[n:]

In [6]:
torch.manual_seed(1337)
batch_size = 4
block_size = 8

def get_batch(split):
    data = train_data if split=='train' else val_data
    ix = torch.randint(len(data)-block_size,(batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x,y

xb, yb = get_batch('train')

In [15]:
import torch.nn as nn
import torch.nn.functional as F
torch.manual_seed(1337)

class BigramLanguageModel(nn.Module):

    def __init__(self, vocab_size):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, vocab_size)

    def forward(self, idx, targets = None):

        logits = self.token_emb(idx)

        if targets is None:
            loss = None
        else:
            B,T,C = logits.shape
            logits = logits.view(B*T,C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, idx, max_tokens):
        # idx is (B,T) tensor of indices
        for _ in range(max_tokens):
            logits =  self.token_emb(idx)
            #we consider only the last time step
            logits = logits[:,-1,:]
            #apply softmax to get probabilities
            probs = F.softmax(logits, dim=-1)
            #sample from the distribution
            idx_next = torch.multinomial(probs, num_samples = 1) # (B,1)
            #append the new index to the sequence
            idx = torch.cat([idx, idx_next], dim=1) # (B,T+1)

        return idx 
         


model = BigramLanguageModel(vocab_size)
logits, loss = model(xb, yb)
print(loss)
print(logits.shape)

idx = torch.zeros((1,1), dtype=torch.long)
print(decode(model.generate(idx,max_tokens= 100)[0].tolist()))


tensor(5.1383, grad_fn=<NllLossBackward0>)
torch.Size([32, 79])

G1)FK[ralZ8FnO6U:pdPNF.C(uWb4V–Z5ewlo05—LA
KH0xXgNC)Wi9’p0X3h7sAz.1e“pNC4TUaZLxdt.p sb$Y7ka2–GK6gmhN


In [17]:
#PyTorch optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3) #1e-3

In [21]:
batch_size = 32

for steps in range(10000):
    #sample a batch of data
    xb, yb = get_batch('train')

    #evaluate the loss
    logits, loss = model(xb, yb) #forward pass
    optimizer.zero_grad(set_to_none= True) #reset the gradients
    loss.backward() #compute the gradients
    optimizer.step() #update the parameters

    print(loss.item())

3.741332530975342
3.7353932857513428
3.6289420127868652
3.6243913173675537
3.6725881099700928
3.6565704345703125
3.637296676635742
3.7081637382507324
3.6499247550964355
3.6445014476776123
3.6421196460723877
3.6427266597747803
3.7271981239318848
3.6486191749572754
3.7248196601867676
3.6626429557800293
3.6649539470672607
3.74411940574646
3.634150505065918
3.66318416595459
3.6510884761810303
3.6742069721221924
3.6972484588623047
3.666884183883667
3.6791651248931885
3.709810495376587
3.600705862045288
3.5894219875335693
3.6189632415771484
3.600315570831299
3.7097573280334473
3.683212995529175
3.6373097896575928
3.6688060760498047
3.7885982990264893
3.7204041481018066
3.6414082050323486
3.6224141120910645
3.6715199947357178
3.613816261291504
3.684192180633545
3.6699588298797607
3.583355665206909
3.6576592922210693
3.550231695175171
3.638981819152832
3.6519761085510254
3.6923491954803467
3.5389838218688965
3.6520779132843018
3.711562395095825
3.582472085952759
3.6478943824768066
3.6821429729

In [23]:
idx = torch.zeros((1,1), dtype=torch.long)
print(decode(model.generate(idx,max_tokens= 300)[0].tolist()))


Amage atin ventrs D lerive t cas intex 14, fosqulls Eazonduceiveredlindrertht fers ld fy e ted [frrononcr ce ast. d d ryppl Amess, tse po by oongrpas. Oun d ha aboripabene litillloriamedinde ack f cazore d Gandarte l Wadeazoullventre lilmes t cacten.AZacag st.
: sondichind icin ses os the s Fintrtic
