<a href="https://colab.research.google.com/github/Rohit909-creator/Rohit909-creator/blob/main/GPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **GPT**

here i am creating a new GPT architecture from scratch
as of now I am only following the diagram of GPT architecture available at Google

As of now for testing purposes I only trained the model with Toy data

In [None]:
!pip install wikipedia -q
!pip install simpletransformers -q

In [None]:



import torch
import torch.nn as nn
import numpy as np
import math
from torch.autograd import Variable



sentences = ['hello how are you',
             'how are you doing',
             'longtime no see where',
             'no see where were',
             'see where were you']

print(sentences[:-1])

string = ''

for s in sentences:
  string += ' '+ s

vocab = sorted(set(string.split(' ')))
vocab = vocab[1:]
word2idx = {w:i for i,w in enumerate(vocab)}

idx2word = {i:w for i,w in enumerate(vocab)}

print(word2idx)

data = np.zeros((5,len(vocab)))

for i,s in enumerate(sentences):
    words = s.split(' ')
    for j,w in enumerate(words):
        data[i][j] = word2idx[w]


print(f'train_data:\n{data}\n\n')

target_data = np.zeros((data.shape[0],4,data.shape[1]))

for i in range(data.shape[0]):
    for j in range(target_data.shape[1]):
        target_data[i][j][int(data[i][j])] = data[i][j]

print(f'target_data:\n{target_data}\n\n')


class GPT(nn.Module):

  def __init__(self,dim_head,head_dim,out_dim):
    super().__init__()

    self.out_dim = out_dim
    self.embed = nn.Embedding(100,dim_head)
    self.pe = PositionalEncoder(dim_head)
    decoder_layer = TransformerDecoderLayer(dim_head, heads=8)
    self.decoder = TransformerDecoder(dim_head,heads = 8, num_layers=10)
    self.fc1 = nn.Linear(dim_head,out_dim)
    self.fc2 = nn.Linear(25,10)
    #self.out = nn.Linear(out_dim,out_dim)
    self.relu = nn.ReLU()

  def forward(self,X,memory):
    #print('yay')
    bs,_ = X.shape
    embed = self.embed(X)
    #print(embed.shape)
    embed = self.pe(embed)
    hidd_stats,out = self.decoder(embed,memory)
    #print('yay')
    out = self.relu(self.fc1(out))
    out = out.reshape(bs,4,-1)
    out = self.fc2(out)
    #out = out.reshape(-1,128*self.out_dim)
    #out = self.out(out)

    return out

class TransformerDecoderLayer(nn.Module):


    def __init__(self,d_model,heads,ff_size = 2096,dropout = 0.1):
        super().__init__()
        
        self.attention = nn.MultiheadAttention(d_model,heads)
        self.attn_norm = nn.LayerNorm(d_model)
        self.encoder_decoder_attn = nn.MultiheadAttention(d_model,heads)
        self.encoder_decoder_attn_norm = nn.LayerNorm(d_model)
        
        self.ff = nn.Sequential(
            nn.Linear(d_model,ff_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(ff_size,d_model)
        )



        self.final_norm = nn.LayerNorm(d_model)
        self.dp = nn.Dropout(dropout)

    def forward(self,src_embeddings,memory,src_mask = None):
        #print('yay')
        #print(f'src_embeddings.shape:{src_embeddings.shape}')
        #print(src_embeddings.shape)
        #print('yay')
        attn_output = self.attention(self.attn_norm(src_embeddings),src_embeddings,src_embeddings)
        #print('yay')
        tensor = self.dp(attn_output[0]) + src_embeddings
        #print('yay')
        attn_ouput = self.encoder_decoder_attn(self.encoder_decoder_attn_norm(src_embeddings),memory,memory)
        #print('yay')
        tensor = self.dp(attn_ouput[0]) + src_embeddings
        #print(tensor)
        #print('yay')
        tensor = self.dp(self.ff(self.final_norm(tensor))) + src_embeddings
        #print('yay')
        return tensor

class TransformerDecoder(nn.Module):
    
    def __init__(self,d_model,heads,num_layers = 10,ff_size = 2096,dropout = 0.1):
        super().__init__()

        self.layers = nn.ModuleList()

        for i in range(num_layers):
            self.layers.append(TransformerDecoderLayer(d_model,heads,ff_size,dropout))

        self.norm = nn.LayerNorm(d_model)

    def forward(self,src_embeddings,memory,src_mask = None):

        hidden_stats = []

        for layer in self.layers:
            #print('weeee')
            src_embeddings = layer(src_embeddings,memory,src_mask)
            hidden_stats.append(src_embeddings)

        return hidden_stats,self.norm(hidden_stats[-1])


class TokenEmbeddings(nn.Module):

  def __init__(self,vocab_size,emb_size):
    super().__init__()

    self.embedding = nn.Embedding(vocab_size,emb_size)
    self.emb_size = emb_size

  def forward(self,token):
    return self.embedding(token.long())*math.sqrt(self.emb_size)


class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 128):
        super().__init__()
        self.d_model = d_model
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        self.pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                self.pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                self.pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        self.pe = self.pe.unsqueeze(0)
        #self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        #print(self.pe.shape)
        x = x + Variable(self.pe[:,:seq_len], \
        requires_grad=False)
        return x


class PositionalEmbeddings(nn.Module):
  
  def __init__(self,emb_size:int,dropout,maxlen: int = 128):
    super().__init__()
    den = torch.exp(-torch.arange(0,emb_size,2)*math.log(10000)/emb_size)
    pos = torch.arange(0,maxlen).reshape(maxlen,1)
    pos_embeddings = torch.zeros((maxlen,emb_size))
    pos_embeddings[:,0:2] = torch.sin(pos*den)
    pos_embeddings[:,1:2] = torch.cos(pos*den)
    print(pos_embeddings.shape)
    pos_embeddings = pos_embeddings.unsqueeze(-2)

    self.dropout = nn.Dropout(dropout)
    #self.register_buffer('pos_embeddings',pos_embeddings)
    self.pos_embeddings = pos_embeddings

  def forward(self,token_embedding):
    
    return self.dropout(token_embedding + self.pos_embeddings[:token_embedding.size(0),:])



# tgt = torch.rand((10,32,128))

# model = GPT(128,8,64)

# out = model(tgt,tgt)
# print(out.shape)



class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 128):
        super().__init__()
        self.d_model = d_model
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        self.pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                self.pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                self.pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        self.pe = self.pe.unsqueeze(0)
        #self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        #print(self.pe.shape)
        x = x + Variable(self.pe[:,:seq_len], \
        requires_grad=False)
        return x


c = 3


if __name__ == '__main__':
  
  T = torch.tensor(data,dtype = torch.long)
  print(T.shape)
  model = GPT(128,2,10)
  memory = torch.rand(2,10,128)
  out = model(T,memory)
  print(out.shape)

  new_data = data[:].tolist()
  new_data.append([8,7,9,0,0,0,0,0,0,0])
  target_data = torch.tensor(target_data,dtype = torch.float32)
  print(target_data.shape,'\n',target_data)


  #training:
  num_epochs = 10000

  loss = nn.CrossEntropyLoss()

  
  optimizer = torch.optim.Adam(model.parameters(),lr = 0.01)
  memory = torch.zeros((2,10,128))
  for epoch in range(num_epochs):
    out = model(T,memory)
    optimizer.zero_grad()
    l = loss(out,target_data)
    l.backward()
    optimizer.step()
    if epoch%100 == 0:
      print(f"Epoch:{epoch} Loss:{l.item()}")
    if l.item() < c:
      c = l.item()
      torch.save(model.state_dict(), "Model.pt")
    if l.item() < 0.1:
      print("yay got there")
      print(f"Epoch:{epoch} Loss:{l.item()}")
      break

print(model(T,memory))    


KeyboardInterrupt: ignored

In [None]:
c

0.6185405254364014

In [None]:
idx2word = {i:w for i,w in enumerate(vocab)}
print(idx2word)
print(word2idx)

{0: 'are', 1: 'doing', 2: 'hello', 3: 'how', 4: 'longtime', 5: 'no', 6: 'see', 7: 'were', 8: 'where', 9: 'you'}
{'are': 0, 'doing': 1, 'hello': 2, 'how': 3, 'longtime': 4, 'no': 5, 'see': 6, 'were': 7, 'where': 8, 'you': 9}


In [None]:
model.eval()
data = np.zeros((1,len(vocab)))
sentence = "hello how are"
words = sentence.split(' ')
for j,w in enumerate(words):
    data[0][j] = word2idx[w]

test_data = torch.tensor(data, dtype = torch.long)
memory = torch.zeros((2,10,128))

temperature = 0.9


outputs = model(test_data, memory)
for i in range(4):
  output_dist = outputs[0][i].view(-1).div(temperature).exp()
  idx = torch.multinomial(output_dist, 1)[0]
  print(idx2word[idx.item()], end = " ")



hello doing longtime were 

In [None]:
outputs.shape

torch.Size([1, 4, 10])

In [None]:
# As of Now just playing with regression instead of probability prediction Hahahahaa


import torch
import torch.nn as nn
import numpy as np
import math
from torch.autograd import Variable



sentences = ['hello how are you',
             'how are you doing',
             'longtime no see where',
             'no see where were',
             'see where were you']

print(sentences[:-1])

string = ''

for s in sentences:
  string += ' '+ s

vocab = sorted(set(string.split(' ')))
vocab = vocab[1:]
word2idx = {w:i+1 for i,w in enumerate(vocab)}

print(word2idx)

data = np.zeros((5,len(vocab)+1))

for i,s in enumerate(sentences):
    words = s.split(' ')
    for j,w in enumerate(words):
        data[i][j] = word2idx[w]


print(f'train_data:\n{data}\n\n')

target_data = np.zeros((data.shape[0],4,data.shape[1]+1))
print(target_data.shape)
for i in range(data.shape[0]):
    for j in range(target_data.shape[1]):
        target_data[i][j][int(data[i][j])] = int(data[i][j])

print(f'target_data:\n{target_data}\n\n')


class GPT(nn.Module):

  def __init__(self,dim_head,head_dim,out_dim):
    super().__init__()

    self.out_dim = out_dim
    self.embed = nn.Embedding(100,dim_head)
    self.pe = PositionalEncoder(dim_head)
    #decoder_layer = TransformerDecoderLayer(dim_head, heads=8)
    self.decoder = TransformerDecoder(dim_head,heads = 8, num_layers=10)
    decoder_layer = DecoderOnlyTransformer(dim_head,dim_head,dim_head, heads=8)
    self.fc1 = nn.Linear(dim_head,out_dim)
    self.fc2 = nn.Linear(25,11)
    #self.out = nn.Linear(out_dim,out_dim)
    self.relu = nn.ReLU()

  def forward(self,X):
    #print('yay')
    bs,_ = X.shape
    src_mask = X == 0
    embed = self.embed(X)
    #print(embed.shape)
    embed = self.pe(embed)
    hidd_stats = self.decoder(embed)
    #print('yay')
    out = self.relu(self.fc1(hidd_stats[-1]))
    print(out.shape)
    out = out.reshape(bs,4,-1)
    print(out.shape)
    out = self.fc2(out)
    #out = out.reshape(-1,128*self.out_dim)
    #out = self.out(out)

    return out


class DecoderOnlyTransformer(nn.Module):
  
  def __init__(self,emb_size,hidden_size,output_size,heads = 8,dropout = 0.1):
    super().__init__()

    self.dp = nn.Dropout(dropout)
    self.attn = nn.MultiheadAttention(emb_size,heads)
    self.attn_norm = nn.LayerNorm(emb_size)
    self.ff_attn_norm = nn.LayerNorm(emb_size)
    
    self.ff = nn.Sequential(
        nn.Linear(emb_size,hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size,output_size)
    )

    self.final_norm = nn.LayerNorm(emb_size)

  def forward(self,X,src_mask = None):
    
    if src_mask == None:
      attn_output = self.attn(self.attn_norm(X),X,X)

    else:
      attn_output = self.attn(self.attn_norm(X),X,src_mask)

    tensor = self.dp(attn_output[0]) + X

    out = self.ff(self.final_norm(tensor))

    tensor = self.dp(out) + tensor

    return tensor
    



class TransformerDecoderLayer(nn.Module):


    def __init__(self,d_model,heads,ff_size = 2096,dropout = 0.1):
        super().__init__()
        
        self.attention = nn.MultiheadAttention(d_model,heads)
        self.attn_norm = nn.LayerNorm(d_model)
        self.encoder_decoder_attn = nn.MultiheadAttention(d_model,heads)
        self.encoder_decoder_attn_norm = nn.LayerNorm(d_model)
        
        self.ff = nn.Sequential(
            nn.Linear(d_model,ff_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(ff_size,d_model)
        )



        self.final_norm = nn.LayerNorm(d_model)
        self.dp = nn.Dropout(dropout)

    def forward(self,src_embeddings,memory,src_mask = None):
        #print('yay')
        #print(f'src_embeddings.shape:{src_embeddings.shape}')
        #print(src_embeddings.shape)
        #print('yay')
        attn_output = self.attention(self.attn_norm(src_embeddings),src_embeddings,src_embeddings)
        #print('yay')
        tensor = self.dp(attn_output[0]) + src_embeddings
        #print('yay')
        attn_ouput = self.encoder_decoder_attn(self.encoder_decoder_attn_norm(src_embeddings),memory,src_embeddings)
        #print('yay')
        tensor = self.dp(attn_ouput[0]) + src_embeddings
        #print(tensor)
        #print('yay')
        attn_output = self.ff(self.final_norm(tensor))
        tensor = self.dp(attn_output[0]) + src_embeddings
        #print('yay')
        return tensor

class TransformerDecoder(nn.Module):
    
    def __init__(self,d_model,heads,num_layers = 10,ff_size = 2096,dropout = 0.1):
        super().__init__()

        self.layers = nn.ModuleList()

        for i in range(num_layers):
            # self.layers.append(TransformerDecoderLayer(d_model,heads,ff_size,dropout))
            self.layers.append(DecoderOnlyTransformer(d_model,d_model,d_model,heads,dropout))
        self.norm = nn.LayerNorm(d_model)

    def forward(self,src_embeddings,src_mask = None):

        hidden_stats = []

        for layer in self.layers:
            #print('weeee')
            # src_embeddings = layer(src_embeddings,memory,src_mask)
            src_embeddings = layer(src_embeddings)
            hidden_stats.append(src_embeddings)

        return hidden_stats,self.norm(hidden_stats[-1])


class TokenEmbeddings(nn.Module):

  def __init__(self,vocab_size,emb_size):
    super().__init__()

    self.embedding = nn.Embedding(vocab_size,emb_size)
    self.emb_size = emb_size

  def forward(self,token):
    return self.embedding(token.long())*math.sqrt(self.emb_size)


class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 128):
        super().__init__()
        self.d_model = d_model
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        self.pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                self.pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                self.pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        self.pe = self.pe.unsqueeze(0)
        #self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        #print(self.pe.shape)
        x = x + Variable(self.pe[:,:seq_len], \
        requires_grad=False)
        return x


class PositionalEmbeddings(nn.Module):
  
  def __init__(self,emb_size:int,dropout,maxlen: int = 128):
    super().__init__()
    den = torch.exp(-torch.arange(0,emb_size,2)*math.log(10000)/emb_size)
    pos = torch.arange(0,maxlen).reshape(maxlen,1)
    pos_embeddings = torch.zeros((maxlen,emb_size))
    pos_embeddings[:,0:2] = torch.sin(pos*den)
    pos_embeddings[:,1:2] = torch.cos(pos*den)
    print(pos_embeddings.shape)
    pos_embeddings = pos_embeddings.unsqueeze(-2)

    self.dropout = nn.Dropout(dropout)
    #self.register_buffer('pos_embeddings',pos_embeddings)
    self.pos_embeddings = pos_embeddings

  def forward(self,token_embedding):
    
    return self.dropout(token_embedding + self.pos_embeddings[:token_embedding.size(0),:])



# tgt = torch.rand((10,32,128))

# model = GPT(128,8,64)

# out = model(tgt,tgt)
# print(out.shape)



class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 128):
        super().__init__()
        self.d_model = d_model
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        self.pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                self.pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                self.pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        self.pe = self.pe.unsqueeze(0)
        #self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        #print(self.pe.shape)
        x = x + Variable(self.pe[:,:seq_len], \
        requires_grad=False)
        return x





if __name__ == '__main__':
  
  T = torch.tensor(data,dtype = torch.long)
  print(T.shape)
  model = GPT(128,2,10)
  memory = torch.rand(2,10,128)
  # out = model(T,memory)
  out = model(T)
    
  print(out.shape)

  new_data = data[:].tolist()
  new_data.append([8,7,9,0,0,0,0,0,0,0])
  target_data = torch.tensor(target_data,dtype = torch.float32)
  print(target_data.shape,'\n',target_data)


  #training:
  num_epochs = 2000

  loss = nn.CrossEntropyLoss()

  model.train()
  optimizer = torch.optim.Adam(model.parameters(),lr = 0.0001)
  memory = torch.zeros((5,10,128))
  for epoch in range(num_epochs):
    # out = model(T,memory)
    out = model(T)
    optimizer.zero_grad()
    l = loss(out,target_data)
    l.backward()
    optimizer.step()
    if epoch%100 == 0:
      print(f"Epoch:{epoch} Loss:{l.item()}")
    if l.item() < 0.009:
      print("yay got there")
      print(f"Epoch:{epoch} Loss:{l.item()}")
      break
model.eval()
print(model(T,memory))    


['hello how are you', 'how are you doing', 'longtime no see where', 'no see where were']
{'are': 1, 'doing': 2, 'hello': 3, 'how': 4, 'longtime': 5, 'no': 6, 'see': 7, 'were': 8, 'where': 9, 'you': 10}
train_data:
[[ 3.  4.  1. 10.  0.  0.  0.  0.  0.  0.  0.]
 [ 4.  1. 10.  2.  0.  0.  0.  0.  0.  0.  0.]
 [ 5.  6.  7.  9.  0.  0.  0.  0.  0.  0.  0.]
 [ 6.  7.  9.  8.  0.  0.  0.  0.  0.  0.  0.]
 [ 7.  9.  8. 10.  0.  0.  0.  0.  0.  0.  0.]]


(5, 4, 12)
target_data:
[[[ 0.  0.  0.  3.  0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  4.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 10.  0.]]

 [[ 0.  0.  0.  0.  4.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 10.  0.]
  [ 0.  0.  2.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]

 [[ 0.  0.  0.  0.  0.  5.  0.  0.  0.  0.  0.  0.]
  [ 0.  0.  0.  0.  0.  0.  6.  0.  0.  0.  0.  0.]


RuntimeError: ignored

In [None]:
t = torch.randn(1,11,10)

torch.Size([5, 4, 10])

In [None]:
print(target_data.shape)#,out.shape)

(5, 4, 10)


In [None]:
memory = torch.zeros((2,10,128))


In [None]:
memory = torch.zeros((2,10,128))

text = 'hello how are you'
data = text.split(' ')
test_data = [0]*10
for i,w in enumerate(data):
  test_data[i] = word2idx[w]


print(test_data)
test_data_a = np.array(test_data).reshape(1,len(test_data))
print(test_data_a)
test_data_t = torch.tensor(test_data_a,dtype = torch.long)
test_data_t.shape


[2, 3, 0, 9, 0, 0, 0, 0, 0, 0]
[[2 3 0 9 0 0 0 0 0 0]]


torch.Size([1, 10])

In [None]:
out = model(test_data_t,memory)

In [None]:
idxtoword = {w:i for i,w in zip(word2idx.keys(),word2idx.values())}

In [None]:
for i in range(out.shape[1]):
  print(idxtoword[torch.argmax(out[0][i]).item()],end=' ')

hello how hello doing 

In [None]:

import torch
import torch.nn as nn
import numpy as np
#import wikipedia
import nltk
import random
import math
from torch.autograd import Variable
import pandas as pd
device = torch.device('cuda')

print(device)

cuda


In [None]:
class GPT(nn.Module):

  def __init__(self,dim_head,head_dim,out_dim):
    super().__init__()

    self.out_dim = out_dim
    decoder_layer = TransformerDecoderLayer(dim_head, heads=8)
    self.decoder = TransformerDecoder(dim_head,heads = 8, num_layers=10)
    self.fc1 = nn.Linear(768,out_dim)
    #self.out = nn.Linear(out_dim,out_dim)
    self.relu = nn.ReLU()

  def forward(self,X,memory):
    #print('yay')
    hidd_stats,out = self.decoder(X,memory)
    #print('yay')
    out = self.relu(self.fc1(out))
    #out = out.reshape(-1,128*self.out_dim)
    #out = self.out(out)

    return out



class DecoderOnlyTransformer(nn.Module):
  
  def __init__(self,emb_size,hidden_size,output_size,heads = 8,dropout = 0.1):
    super().__init__()

    self.dp = nn.Dropout(dropout)
    self.attn = nn.MultiheadAttention(emb_size,heads)
    self.attn_norm = nn.LayerNorm(emb_size)
    self.ff_attn_norm = nn.LayerNorm(emb_size)
    
    self.ff = nn.Sequential(
        nn.Linear(emb_size,hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size,output_size)
    )

    self.final_norm = nn.LayerNorm(emb_size)

  def forward(self,X,src_mask = None):
    
    if src_mask == None:
      attn_output = self.attn(self.attn_norm(X),X,X)

    else:
      attn_output = self.attn(self.attn_norm(X),X,src_mask)

    tensor = self.dp(attn_output[0]) + X

    out = self.ff(self.final_norm(tensor))

    tensor = self.dp(out) + tensor

    return tensor
    











class TransformerDecoder(nn.Module):
    
    def __init__(self,d_model,heads,num_layers = 10,ff_size = 2096,dropout = 0.1):
        super().__init__()

        self.layers = nn.ModuleList()

        for i in range(num_layers):
            self.layers.append(TransformerDecoderLayer(d_model,heads,ff_size,dropout))

        self.norm = nn.LayerNorm(d_model)

    def forward(self,src_embeddings,memory,src_mask = None):

        hidden_stats = []

        for layer in self.layers:
            src_embeddings = layer(src_embeddings,memory,src_mask)
            hidden_stats.append(src_embeddings)

        return hidden_stats,self.norm(hidden_stats[-1])


class TokenEmbeddings(nn.Module):

  def __init__(self,vocab_size,emb_size):
    super().__init__()

    self.embedding = nn.Embedding(vocab_size,emb_size)
    self.emb_size = emb_size

  def forward(self,token):
    return self.embedding(tokens.long())*math.sqrt(emb_size)


class PositionalEmbeddings(nn.Module):
  
  def __init__(self,emb_size:int,dropout,maxlen: int = 5000):
    super().__init__()
    den = torch.exp(-torch.arange(0,emb_size,2)*math.log(10000)/emb_size)
    pos = torch.arange(0,maxlen).reshape(maxlen,1)
    pos_embeddings = torch.zeros((maxlen,emb_size))
    pos_embeddings[:,0:2] = torch.sin(pos*den)
    pos_embeddings[:,1:2] = torch.cos(pos*den)
    print(pos_embeddings.shape)
    pos_embeddings = pos_embeddings.unsqueeze(-2)

    self.dropout = nn.Dropout(dropout)
    #self.register_buffer('pos_embeddings',pos_embeddings)
    self.pos_embeddings = pos_embeddings

  def forward(self,token_embedding):
    
    return self.dropout(token_embedding + self.pos_embeddings[:token_embedding.size(0),:])

class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 128):
        super().__init__()
        self.d_model = d_model
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        self.pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                self.pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                self.pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        self.pe = self.pe.unsqueeze(0)
        #self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        #print(self.pe.shape)
        x = x + Variable(self.pe[:,:seq_len], \
        requires_grad=False)
        return x




In [None]:
model = GPT(768,2048,out_dim = 768)
out_t = torch.tensor(out,dtype = torch.float32)
tgt = torch.rand(5, 32, 768,dtype=torch.float32)
memory = torch.rand(5, 35, 768,dtype = torch.float32)
#print(tgt.shape)
output = model(out_t,memory)
print(output.shape)


NameError: ignored

# New


In [None]:
class GPT(nn.Module):

  def __init__(self,dim_head,head_dim,out_dim):
    super().__init__()

    self.out_dim = out_dim
    self.embed = nn.Embedding(100,dim_head)
    self.pe = PositionalEncoder(dim_head)
    decoder_layer = TransformerDecoderLayer(dim_head, heads=8)
    self.decoder = TransformerDecoder(dim_head,heads = 8, num_layers=10)
    self.fc1 = nn.Linear(dim_head,out_dim)
    self.fc2 = nn.Linear(25,10)
    #self.out = nn.Linear(out_dim,out_dim)
    self.relu = nn.ReLU()

  def forward(self,X,memory):
    #print('yay')
    bs,_ = X.shape
    embed = self.embed(X)
    #print(embed.shape)
    embed = self.pe(embed)
    hidd_stats,out = self.decoder(embed,memory)
    #print('yay')
    out = self.relu(self.fc1(out))
    out = out.reshape(bs,4,-1)
    out = self.fc2(out)
    #out = out.reshape(-1,128*self.out_dim)
    #out = self.out(out)

    return out

class TransformerDecoderLayer(nn.Module):


    def __init__(self,d_model,heads,ff_size = 2096,dropout = 0.1):
        super().__init__()
        
        self.attention = nn.MultiheadAttention(d_model,heads)
        self.attn_norm = nn.LayerNorm(d_model)
        self.encoder_decoder_attn = nn.MultiheadAttention(d_model,heads)
        self.encoder_decoder_attn_norm = nn.LayerNorm(d_model)
        
        self.ff = nn.Sequential(
            nn.Linear(d_model,ff_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(ff_size,d_model)
        )



        self.final_norm = nn.LayerNorm(d_model)
        self.dp = nn.Dropout(dropout)

    def forward(self,src_embeddings,memory,src_mask = None):
        #print('yay')
        #print(f'src_embeddings.shape:{src_embeddings.shape}')
        #print(src_embeddings.shape)
        #print('yay')
        attn_output = self.attention(self.attn_norm(src_embeddings),src_embeddings,src_embeddings)
        #print('yay')
        tensor = self.dp(attn_output[0]) + src_embeddings
        #print('yay')
        attn_ouput = self.encoder_decoder_attn(self.encoder_decoder_attn_norm(src_embeddings),memory,memory)
        #print('yay')
        tensor = self.dp(attn_ouput[0]) + src_embeddings
        #print(tensor)
        #print('yay')
        tensor = self.dp(self.ff(self.final_norm(tensor))) + src_embeddings
        #print('yay')
        return tensor

class TransformerDecoder(nn.Module):
    
    def __init__(self,d_model,heads,num_layers = 10,ff_size = 2096,dropout = 0.1):
        super().__init__()

        self.layers = nn.ModuleList()

        for i in range(num_layers):
            self.layers.append(TransformerDecoderLayer(d_model,heads,ff_size,dropout))

        self.norm = nn.LayerNorm(d_model)

    def forward(self,src_embeddings,memory,src_mask = None):

        hidden_stats = []

        for layer in self.layers:
            #print('weeee')
            src_embeddings = layer(src_embeddings,memory,src_mask)
            hidden_stats.append(src_embeddings)

        return hidden_stats,self.norm(hidden_stats[-1])


class TokenEmbeddings(nn.Module):

  def __init__(self,vocab_size,emb_size):
    super().__init__()

    self.embedding = nn.Embedding(vocab_size,emb_size)
    self.emb_size = emb_size

  def forward(self,token):
    return self.embedding(token.long())*math.sqrt(self.emb_size)


class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 128):
        super().__init__()
        self.d_model = d_model
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        self.pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                self.pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                self.pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        self.pe = self.pe.unsqueeze(0)
        #self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        #print(self.pe.shape)
        x = x + Variable(self.pe[:,:seq_len], \
        requires_grad=False)
        return x


class PositionalEmbeddings(nn.Module):
  
  def __init__(self,emb_size:int,dropout,maxlen: int = 128):
    super().__init__()
    den = torch.exp(-torch.arange(0,emb_size,2)*math.log(10000)/emb_size)
    pos = torch.arange(0,maxlen).reshape(maxlen,1)
    pos_embeddings = torch.zeros((maxlen,emb_size))
    pos_embeddings[:,0:2] = torch.sin(pos*den)
    pos_embeddings[:,1:2] = torch.cos(pos*den)
    print(pos_embeddings.shape)
    pos_embeddings = pos_embeddings.unsqueeze(-2)

    self.dropout = nn.Dropout(dropout)
    #self.register_buffer('pos_embeddings',pos_embeddings)
    self.pos_embeddings = pos_embeddings

  def forward(self,token_embedding):
    
    return self.dropout(token_embedding + self.pos_embeddings[:token_embedding.size(0),:])



# tgt = torch.rand((10,32,128))

# model = GPT(128,8,64)

# out = model(tgt,tgt)
# print(out.shape)



class PositionalEncoder(nn.Module):
    def __init__(self, d_model, max_seq_len = 128):
        super().__init__()
        self.d_model = d_model
        
        # create constant 'pe' matrix with values dependant on 
        # pos and i
        self.pe = torch.zeros(max_seq_len, d_model)
        for pos in range(max_seq_len):
            for i in range(0, d_model, 2):
                self.pe[pos, i] = \
                math.sin(pos / (10000 ** ((2 * i)/d_model)))
                self.pe[pos, i + 1] = \
                math.cos(pos / (10000 ** ((2 * (i + 1))/d_model)))
                
        self.pe = self.pe.unsqueeze(0)
        #self.register_buffer('pe', pe)
 
    
    def forward(self, x):
        # make embeddings relatively larger
        x = x * math.sqrt(self.d_model)
        #add constant to embedding
        seq_len = x.size(1)
        #print(self.pe.shape)
        x = x + Variable(self.pe[:,:seq_len], \
        requires_grad=False)
        return x




In [None]:
data = pd.read_table('Chat_data.txt')

In [None]:
human_1 = data[0:-1:2].values
human_2 = data[1:-1:2].values

string1 = ''
string2 = ''

for s in range(len(human_2)):
  string1 += ' '+ human_2[s][0]
  string2 += ' ' + human_1[s][0]
string1 = string1.replace('Human 2:',"")
string2 = string2.replace('Human 1:',"")
tokens = nltk.wordpunct_tokenize(string1)
text = nltk.Text(tokens)

words = list(sorted(set([w.lower() for w in text if w.isalpha()])))

tokens = nltk.wordpunct_tokenize(string2)
text = nltk.Text(tokens)

words2 = list(sorted(set([w.lower() for w in text if w.isalpha()])))


print(words)
print(len(words),"\n\n")

print(words2)
print(len(words2))


['a', 'able', 'about', 'absolutely', 'accordingly', 'accounting', 'accumulates', 'accurate', 'acquire', 'act', 'activities', 'activity', 'actor', 'actually', 'acupuncture', 'add', 'admit', 'adrenaline', 'adventure', 'adventures', 'adventurous', 'advice', 'affected', 'afraid', 'africa', 'after', 'afterlife', 'aftermath', 'afternoon', 'again', 'against', 'ago', 'agony', 'agree', 'ah', 'ahh', 'ai', 'aim', 'air', 'aka', 'alien', 'aliens', 'alive', 'all', 'allow', 'almost', 'along', 'alphabets', 'already', 'also', 'although', 'always', 'am', 'amateur', 'amazing', 'amusement', 'an', 'analysis', 'and', 'anger', 'angry', 'anime', 'another', 'answer', 'anxious', 'any', 'anymore', 'anyone', 'anything', 'anyway', 'anyways', 'anywhere', 'apparently', 'appeal', 'apple', 'appropriate', 'arbitrary', 'are', 'area', 'aren', 'around', 'arrangement', 'arrived', 'art', 'artificial', 'as', 'asia', 'asian', 'ask', 'asked', 'asking', 'associate', 'assumed', 'assure', 'at', 'ate', 'attack', 'attend', 'attendi

In [None]:
idx2w1 = {i+1:w for i,w in enumerate(words2)}
print(idx2w1,'\n')
w12idx = {w:i+1 for i,w in enumerate(words2)}
print(w12idx,'\n')
idx2w2 = {i+1:w for i,w in enumerate(words)}
print(idx2w2,'\n')
w22idx = {w:i+1 for i,w in enumerate(words)}
print(w22idx,'\n')


{1: 'a', 2: 'ability', 3: 'able', 4: 'about', 5: 'absolutely', 6: 'abt', 7: 'accidentally', 8: 'accidents', 9: 'accomplishments', 10: 'accounting', 11: 'acrophobia', 12: 'action', 13: 'activities', 14: 'activity', 15: 'actor', 16: 'actually', 17: 'acupuncturist', 18: 'addicted', 19: 'admit', 20: 'adopted', 21: 'advance', 22: 'advances', 23: 'advantages', 24: 'advent', 25: 'adventure', 26: 'adventures', 27: 'advocate', 28: 'affected', 29: 'aficianado', 30: 'afraid', 31: 'african', 32: 'after', 33: 'afternoon', 34: 'afterwards', 35: 'again', 36: 'ago', 37: 'agree', 38: 'ah', 39: 'ahh', 40: 'ahhh', 41: 'ai', 42: 'air', 43: 'airbnb', 44: 'alcohol', 45: 'aligned', 46: 'alive', 47: 'all', 48: 'almost', 49: 'along', 50: 'already', 51: 'alright', 52: 'also', 53: 'alto', 54: 'always', 55: 'am', 56: 'amazing', 57: 'ambitious', 58: 'america', 59: 'among', 60: 'amount', 61: 'an', 62: 'and', 63: 'animal', 64: 'anime', 65: 'answer', 66: 'any', 67: 'anyday', 68: 'anything', 69: 'anyways', 70: 'apart'

In [None]:
 string3 = ''
 for s in range(len(human_2)):
  string3 += ' '+ human_2[s][0]
tokens = nltk.wordpunct_tokenize(string3)
words = nltk.Text(tokens)
words = [w.lower() for w in words if w.isalpha()]
string4 = ''
for w in words:
  string4 += ' ' + w

sentences = string4.split('human')

In [None]:
def ltstring(l):
  string = ''
  for w in l:
    string += ' ' + w
  return string


padded_sentences = []
for s in sentences:
  word = nltk.word_tokenize(s)
  if len(word) > 32:
    w = ltstring(word[0:32])
    w2 = ltstring(word[32:-1])
    padded_sentences.append(w)
    padded_sentences.append(w2)
  else:
    padded_sentences.append(s)

print(padded_sentences[0:10])

[' ', ' what is your favorite holiday ', ' what was the most number of people you have ever met during a holiday ', ' which holiday was that ', ' do you still talk to the people you met ', ' yea me too i feel like god often puts strangers in front of you and gives you an opportunity to connect with them in that moment in deeply meaningful ways do you', ' ever feel like you know things about strangers without them telling', ' i think it s like a sense often seen as cold readings to people but can be remarkably accurate i once sat next to a man in a coffee and i felt', ' a pain in my back i asked the stranger if he had a pain it turns out that he did in the exact spot and said he pulled a muscle while dancing at a party i had never met the man before and never saw him', ' there s this practice called treasure hunting that s kind of a fun game you play in a public place there s a book called the ultimate treasure hunt that talks about']


In [None]:

def maxseqlen(l):
  v = 0
  for s in l:
    w = nltk.word_tokenize(s)
    if v < len(w):
      v = len(w)

  return v

maxseqlen(sentences)





154

In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
sorted_words = list(sorted(set(words)))

print(sorted_words)

data = np.zeros((len(sentences),len(sorted_words)),dtype = np.int16)

for i,s in enumerate(sentences):
    word = nltk.word_tokenize(s)
    try:

      for j,w in enumerate(word):
          data[i][j] = int(w22idx[w])
    except KeyError:
      pass


print(data)
print(w22idx['what'])



['a', 'able', 'about', 'absolutely', 'accordingly', 'accounting', 'accumulates', 'accurate', 'acquire', 'act', 'activities', 'activity', 'actor', 'actually', 'acupuncture', 'add', 'admit', 'adrenaline', 'adventure', 'adventures', 'adventurous', 'advice', 'affected', 'afraid', 'africa', 'after', 'afterlife', 'aftermath', 'afternoon', 'again', 'against', 'ago', 'agony', 'agree', 'ah', 'ahh', 'ai', 'aim', 'air', 'aka', 'alien', 'aliens', 'alive', 'all', 'allow', 'almost', 'along', 'alphabets', 'already', 'also', 'although', 'always', 'am', 'amateur', 'amazing', 'amusement', 'an', 'analysis', 'and', 'anger', 'angry', 'anime', 'another', 'answer', 'anxious', 'any', 'anymore', 'anyone', 'anything', 'anyway', 'anyways', 'anywhere', 'apparently', 'appeal', 'apple', 'appropriate', 'arbitrary', 'are', 'area', 'aren', 'around', 'arrangement', 'arrived', 'art', 'artificial', 'as', 'asia', 'asian', 'ask', 'asked', 'asking', 'associate', 'assumed', 'assure', 'at', 'ate', 'attack', 'attend', 'attendi

In [None]:
t_data = np.zeros((1,32,len(w22idx))

for i in range(data.shape[0]):
  d = list(data[i])
  val = len(sorted_words) - d.count(0)
  if val <= 32:
    for j,idx in enumerate(data[i]):
      l = [0]*len(sorted_words)
      l[idx] = 1 
      t_data[i][j] = l 


In [None]:
arr = np.zeros((1,10,10))
arr = np.append(arr,[[[0]*1]*10],1)
arr = np.append(arr,[[[1]*1]*10],1)
print(arr.shape)

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
#using the <LOV> to tokenize the unknown words i.e. the words, which are not in the vocabulary, will be represented as <LOV> in the word_index
tokenizer = Tokenizer(oov_token="<LOV>")
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences,maxlen = 32,padding='post')

print(padded[1])
print(padded.shape)


[ 15  10  31 117 208   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
(753, 32)


numpy.ndarray

In [None]:
label_tokenizer = Tokenizer()
label_tokenizer.fit_on_texts(['what','is','your'])
label_word_index = label_tokenizer.word_index
label_seq = label_tokenizer.texts_to_sequences(['what','is','your'])
print(label_seq[:5])
print('— — -')
print(label_word_index)

[[1], [2], [3]]
— — -
{'what': 1, 'is': 2, 'your': 3}


In [None]:
tokenizer.sequences_to_texts([[ 15 , 10 , 31, 117, 208 ,  0 ,  0 ,  0  , 0 ,  0 ,  0 ,  0  , 0  , 0 ,  0 ,  0 ,  0 ,  0
,   0 ,  0 ,  0 ,  0 ,  0 ,  0  , 0  , 0  , 0 ,  0 ,  0 ,  0 ,  0 ,  0]])

['what is your favorite holiday <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV> <LOV>']

# **Data Preprocessing**

In [None]:
text = open('Chat_data.txt','rb').read().decode(encoding ='utf-8')
text

'data\r\nHuman 1: Hi!\r\nHuman 2: What is your favorite holiday?\r\nHuman 1: one where I get to meet lots of different people.\r\nHuman 2: What was the most number of people you have ever met during a holiday?\r\nHuman 1: Hard to keep a count. Maybe 25.\r\nHuman 2: Which holiday was that?\r\nHuman 1: I think it was Australia\r\nHuman 2: Do you still talk to the people you met?\r\nHuman 1: Not really. The interactions are usually short-lived but it\'s fascinating to learn where people are coming from and what matters to them\r\nHuman 2: Yea, me too. I feel like God often puts strangers in front of you, and gives you an opportunity to connect with them in that moment in deeply meaningful ways. Do you ever feel like you know things about strangers without them telling you?\r\nHuman 1: what do you mean?\r\nHuman 2: I think it\'s like a 6th sense, often seen as "cold readings" to people, but can be remarkably accurate. I once sat next to a man in a coffee and I felt a pain in my back. I ask

In [None]:
import re
text=re.sub(r'Human: ','',text)

In [None]:
text

'data\r\nHuman 1: Hi!\r\nHuman 2: What is your favorite holiday?\r\nHuman 1: one where I get to meet lots of different people.\r\nHuman 2: What was the most number of people you have ever met during a holiday?\r\nHuman 1: Hard to keep a count. Maybe 25.\r\nHuman 2: Which holiday was that?\r\nHuman 1: I think it was Australia\r\nHuman 2: Do you still talk to the people you met?\r\nHuman 1: Not really. The interactions are usually short-lived but it\'s fascinating to learn where people are coming from and what matters to them\r\nHuman 2: Yea, me too. I feel like God often puts strangers in front of you, and gives you an opportunity to connect with them in that moment in deeply meaningful ways. Do you ever feel like you know things about strangers without them telling you?\r\nHuman 1: what do you mean?\r\nHuman 2: I think it\'s like a 6th sense, often seen as "cold readings" to people, but can be remarkably accurate. I once sat next to a man in a coffee and I felt a pain in my back. I ask

In [None]:
print("The length of characters in the text of database",len(text))

The length of characters in the text of database 117656


In [None]:
vocab=sorted(set(text))

In [None]:
print(vocab[:40])

['\n', '\r', ' ', '!', '"', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '9', ':', ';', '<', '>', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']


In [None]:
char2idx = {u:i for i,u in enumerate(vocab)}

{'\n': 0, '\r': 1, ' ': 2, '!': 3, '"': 4, '%': 5, '&': 6, "'": 7, '(': 8, ')': 9, '*': 10, '+': 11, ',': 12, '-': 13, '.': 14, '/': 15, '0': 16, '1': 17, '2': 18, '3': 19, '4': 20, '5': 21, '6': 22, '7': 23, '9': 24, ':': 25, ';': 26, '<': 27, '>': 28, '?': 29, 'A': 30, 'B': 31, 'C': 32, 'D': 33, 'E': 34, 'F': 35, 'G': 36, 'H': 37, 'I': 38, 'J': 39, 'K': 40, 'L': 41, 'M': 42, 'N': 43, 'O': 44, 'P': 45, 'Q': 46, 'R': 47, 'S': 48, 'T': 49, 'U': 50, 'V': 51, 'W': 52, 'X': 53, 'Y': 54, 'Z': 55, '\\': 56, '_': 57, 'a': 58, 'b': 59, 'c': 60, 'd': 61, 'e': 62, 'f': 63, 'g': 64, 'h': 65, 'i': 66, 'j': 67, 'k': 68, 'l': 69, 'm': 70, 'n': 71, 'o': 72, 'p': 73, 'q': 74, 'r': 75, 's': 76, 't': 77, 'u': 78, 'v': 79, 'w': 80, 'x': 81, 'y': 82, 'z': 83, '~': 84, '\x90': 85, '\x9d': 86, '¦': 87, '©': 88, '¹': 89, '¼': 90, 'Ã': 91, 'â': 92, 'æ': 93, 'ç': 94, 'ï': 95, 'ð': 96, 'œ': 97, 'š': 98, 'Ÿ': 99, 'ž': 100, '˜': 101, '‚': 102, '†': 103, '•': 104, '‰': 105, '›': 106, '€': 107, '™': 108}


In [None]:
idx2char = np.array(vocab)
print(idx2char)

['\n' '\r' ' ' '!' '"' '%' '&' "'" '(' ')' '*' '+' ',' '-' '.' '/' '0' '1'
 '2' '3' '4' '5' '6' '7' '9' ':' ';' '<' '>' '?' 'A' 'B' 'C' 'D' 'E' 'F'
 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' 'X'
 'Y' 'Z' '\\' '_' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l' 'm' 'n'
 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '~' '\x90' '\x9d' '¦' '©'
 '¹' '¼' 'Ã' 'â' 'æ' 'ç' 'ï' 'ð' 'œ' 'š' 'Ÿ' 'ž' '˜' '‚' '†' '•' '‰' '›'
 '€' '™']


In [None]:
#Convert the text to int
text_as_int=np.array([char2idx[c] for c in text])
print(text_as_int)

[61 58 77 ... 66 77 14]


In [None]:
print('{')
for char,_ in zip(char2idx, range(20)):
    print('  {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print('  ...\n}')

{
  '\n':   0,
  '\r':   1,
  ' ' :   2,
  '!' :   3,
  '"' :   4,
  '%' :   5,
  '&' :   6,
  "'" :   7,
  '(' :   8,
  ')' :   9,
  '*' :  10,
  '+' :  11,
  ',' :  12,
  '-' :  13,
  '.' :  14,
  '/' :  15,
  '0' :  16,
  '1' :  17,
  '2' :  18,
  '3' :  19,
  ...
}


In [None]:
# Show how the first 13 characters from the text are mapped to integers
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[0]), text_as_int[0]))

'd' ---- characters mapped to int ---- > 61
