In [1]:
import torch
import torch.nn as nn
import numpy as np
import re
import pickle

In [2]:
# Load vocab
with open("C:/Users/BIBHAV KUMAR/Desktop/Chatbot_Project/Revised_Model/vocab.pkl", "rb") as f:
    word2idx, idx2word = pickle.load(f)

vocab_size = len(word2idx)
MAX_LEN = 20
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Tokenize and Encode
def tokenize(text):
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]", "", text)
    return text.split()

def encode(tokens, word2idx, max_len=MAX_LEN):
    ids = [word2idx.get(w, word2idx["<unk>"]) for w in tokens]
    return [word2idx["<sos>"]] + ids[:max_len - 2] + [word2idx["<eos>"]]

def decode(ids, idx2word):
    words = [idx2word.get(i, "<unk>") for i in ids]
    return " ".join([w for w in words if w not in ["<sos>", "<eos>", "<pad>"]])

In [4]:
# Positional Encoding
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = pe.unsqueeze(0)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)].to(x.device)

In [5]:
# Transformer Model
class TransformerChatbot(nn.Module):
    def __init__(self, vocab_size, d_model=256, nhead=8, num_layers=4, dim_ff=512):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        self.transformer = nn.Transformer(
            d_model=d_model, nhead=nhead,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=dim_ff,
            batch_first=True
        )
        self.fc_out = nn.Linear(d_model, vocab_size)
        self.d_model = d_model

    def forward(self, src, tgt):
        src_emb = self.embedding(src) * torch.sqrt(torch.tensor(self.d_model, dtype=torch.float32)).to(src.device)
        tgt_emb = self.embedding(tgt) * torch.sqrt(torch.tensor(self.d_model, dtype=torch.float32)).to(tgt.device)
        src_emb = self.pos_encoder(src_emb)
        tgt_emb = self.pos_encoder(tgt_emb)

        tgt_mask = self.transformer.generate_square_subsequent_mask(tgt.size(1)).to(tgt.device)
        src_key_padding_mask = (src == word2idx['<pad>'])
        tgt_key_padding_mask = (tgt == word2idx['<pad>'])

        out = self.transformer(
            src_emb, tgt_emb,
            tgt_mask=tgt_mask,
            src_key_padding_mask=src_key_padding_mask,
            tgt_key_padding_mask=tgt_key_padding_mask,
            memory_key_padding_mask=src_key_padding_mask
        )
        return self.fc_out(out)

In [6]:
# Load Model
model = TransformerChatbot(vocab_size)
model.load_state_dict(torch.load("C:/Users/BIBHAV KUMAR/Desktop/Chatbot_Project/Revised_Model/transformer_chatbot6.pth", map_location=device))
model.to(device)
model.eval()

TransformerChatbot(
  (embedding): Embedding(17634, 256)
  (pos_encoder): PositionalEncoding()
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-3): 4 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
          )
          (linear1): Linear(in_features=256, out_features=512, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=512, out_features=256, bias=True)
          (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
      (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    )
    (decoder): TransformerDecoder(
      (layers): ModuleList(
       

In [7]:
# Inference Function
def generate_response(model, input_text, max_len=MAX_LEN):
    tokens = tokenize(input_text)
    input_ids = encode(tokens, word2idx, max_len)
    input_tensor = torch.tensor(input_ids).unsqueeze(0).to(device)

    output_ids = [word2idx["<sos>"]]
    for _ in range(max_len):
        output_tensor = torch.tensor(output_ids).unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(input_tensor, output_tensor)
        next_token_logits = output[0, -1, :]
        next_token = next_token_logits.argmax().item()
        output_ids.append(next_token)
        if next_token == word2idx["<eos>"]:
            break

    return decode(output_ids, idx2word)

In [None]:
# Chat Loop
print("ðŸ¤– Chatbot is ready! Type 'exit' to quit.")
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Chatbot: Goodbye! ðŸ‘‹")
        break
    response = generate_response(model, user_input)
    print("Chatbot:", response)

ðŸ¤– Chatbot is ready! Type 'exit' to quit.


  output = torch._nested_tensor_from_mask(


Chatbot: hey mate i am doing just fine please lets move ahead with the queries i can help you
Chatbot: please contact our customer service immediately to report your lost card and request a replacement
Chatbot: flu symptoms include fever cough sore throat body aches and fatigue
Chatbot: i really hope it rains today
Chatbot: that is true the weather is constantly changing
Chatbot: i am mike
Chatbot: i am mike
Chatbot: i am mike
