In [1]:
# Loading the data
import json
with open("intents.json", "r") as file:
    data = json.load(file)

patterns = []
responses = []

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        patterns.append(pattern)
        responses.append(intent['responses'][0])

In [2]:
# Initialize the tokenizer and model
from transformers import GPT2Tokenizer, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
import torch
from torch.utils.data import Dataset, DataLoader
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-medium')
model = GPT2LMHeadModel.from_pretrained('gpt2-medium')
tokenizer.pad_token = tokenizer.eos_token

# Define the dataset
class ConversationDataset(Dataset):
    def __init__(self, questions, answers, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.questions = questions
        self.answers = answers
        self.max_length = max_length

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        answer = self.answers[idx]

        inputs = self.tokenizer.encode_plus(
            question, 
            answer, 
            add_special_tokens=True, 
            max_length=self.max_length, 
            padding='max_length', 
            truncation='only_first',
            return_tensors='pt',
            return_overflowing_tokens=True
        )

        input_ids = inputs['input_ids'].flatten()
        if len(input_ids) != self.max_length:
           print(f"Unexpected length at index {idx}: {len(input_ids)}")

        return {
            'input_ids': input_ids,
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': inputs['input_ids'].flatten()
        }

# Training Preparation
max_length = 100
dataset = ConversationDataset(patterns, responses, tokenizer, max_length=max_length)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

optimizer = AdamW(model.parameters(), lr=5e-5, no_deprecation_warning=True)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(dataloader) * 15)  # Assuming 3 epochs

# Training Loop
epochs = 15
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for batch in dataloader:
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        optimizer.step()
        scheduler.step()

    print(f"Epoch: {epoch + 1}, Loss: {total_loss / len(dataloader)}")

Epoch: 1, Loss: 1.550204546220841
Epoch: 2, Loss: 0.5390577071136043
Epoch: 3, Loss: 0.3518816126931098
Epoch: 4, Loss: 0.23296106943199713
Epoch: 5, Loss: 0.1582493229258445
Epoch: 6, Loss: 0.11154509371807499
Epoch: 7, Loss: 0.0869935367616915
Epoch: 8, Loss: 0.07047792572167612
Epoch: 9, Loss: 0.059371744252500996
Epoch: 10, Loss: 0.05564107805971177
Epoch: 11, Loss: 0.053503402779179234
Epoch: 12, Loss: 0.05051255178066992
Epoch: 13, Loss: 0.04794458486139774
Epoch: 14, Loss: 0.04415519457430609
Epoch: 15, Loss: 0.04088488046921069


In [3]:
# Saving the model after trainin
model_path = './trained_gpt_model'
tokenizer.save_pretrained(model_path)
model.save_pretrained(model_path)

In [4]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

tokenizer = GPT2Tokenizer.from_pretrained("./trained_gpt_model")
model = GPT2LMHeadModel.from_pretrained("./trained_gpt_model")
model.eval()  # Set the model to evaluation mode
model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))  # Move model to GPU if available

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1024)
    (wpe): Embedding(1024, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50257, bias=False)
)

In [5]:
def get_response(user_input, max_length=50):
    # Encode the user's input and add the eos_token
    input_ids = tokenizer.encode(user_input, return_tensors='pt', add_special_tokens=True)
    input_ids = input_ids.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))  # Move tensor to GPU if available

    # Generate a response from the model
    with torch.no_grad():
        output = model.generate(input_ids, pad_token_id=tokenizer.eos_token_id, max_length=max_length, num_return_sequences=1)

    # Decode the response
    response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
    
    return response

In [6]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ['quit', 'exit']:
        break
    response = get_response(user_input)
    print("Bot:", response)


Bot: Hello, thanks for asking
Bot: Please enter with order ID
Bot: Yes
Bot: Please enter your email id,we will send a link on your email
