In [63]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pyttsx3 as tts
import re
from torch.utils.data import Dataset, DataLoader

In [57]:
with open("C:/Users/logan/Documents/Coding/Python/kAIra/3.0/Main/dialogues.txt", "r", errors="ignore") as file:
    data = file.read()
    
def parse(text):
    #text = re.sub(r"\n", " ", text)
    text = re.sub(r" â€™ ", "'", text)
    text = re.sub(r'[A-Z]', lambda match: match.group().lower(), text)
    return text

# Function to process the contents of the text file
def process_text_file(file_path):
    # List to store the tuples
    tuples_list = []

    # Open and read the file
    with open(file_path, 'r', errors='ignore') as file:
        lines = file.readlines()

        # Process each line
        for line in lines:
            # Strip any leading/trailing whitespace
            line = line.strip()

            # Split the line by __eou__
            entries = line.split('__eou__')

            # Create tuples from consecutive entries
            for i in range(len(entries) - 1):
                tuple_entry = (entries[i], entries[i + 1])
                tuples_list.append(tuple_entry)

    return tuples_list

file_path = 'C:/Users/logan/Documents/Coding/Python/kAIra/3.0/Main/dialogues.txt'
result = process_text_file(file_path)
print(result[:10])

[('the kitchen stinks . ', " i'll throw out the garbage . "), (" i'll throw out the garbage . ", ''), ('so dick , how about getting some coffee for tonight ? ', " coffee ? i don't honestly like that kind of stuff . "), (" coffee ? i don't honestly like that kind of stuff . ", ' come on , you can at least try a little , besides your cigarette . '), (' come on , you can at least try a little , besides your cigarette . ', " what's wrong with that ? cigarette is the thing i go crazy for . "), (" what's wrong with that ? cigarette is the thing i go crazy for . ", ' not for me , dick . '), (' not for me , dick . ', ''), ('are things still going badly with your houseguest ? ', " getting worse . now he's eating me out of house and home . i've tried talking to him but it all goes in one ear and out the other . he makes himself at home , which is fine . but what really gets me is that yesterday he walked into the living room in the raw and i had company over ! that was the last straw . "), (" ge

In [58]:

# Dummy data for demonstration purposes
data = result

# Tokenization and vocabulary building
word2idx = {}
idx2word = {}
for sentence, response in data:
    for word in sentence.split() + response.split():
        if word not in word2idx:
            idx2word[len(word2idx)] = word
            word2idx[word] = len(word2idx)

def tokenize(sentence):
    return [word2idx[word] for word in sentence.split()]

def detokenize(tokens):
    return ' '.join([idx2word[token] for token in tokens])

# Preparing input-output pairs
input_data = [tokenize(sentence) for sentence, _ in data]
target_data = [tokenize(response) for _, response in data]

# Padding sequences to ensure uniform length
def pad_sequence(seq, max_length):
    return seq + [0] * (max_length - len(seq))

max_length = max(max(len(seq) for seq in input_data), max(len(seq) for seq in target_data))
input_data = [pad_sequence(seq, max_length) for seq in input_data]
target_data = [pad_sequence(seq, max_length) for seq in target_data]

input_data = torch.tensor(input_data, dtype=torch.long)
target_data = torch.tensor(target_data, dtype=torch.long)


In [69]:
class ChatbotModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(ChatbotModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = self.embedding(x)
        lstm_out, _ = self.lstm(x)
        logits = self.fc(lstm_out)
        return logits

vocab_size = len(word2idx)
embedding_dim = 10
hidden_dim = 50
output_dim = vocab_size
batch_size = 32


model = ChatbotModel(vocab_size, embedding_dim, hidden_dim, output_dim)


In [70]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


class Mydataset(Dataset):
    def __init__(self, data, targets):
        self.data = data
        self.targets = targets
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.targets[idx]

data = torch.randint(0, vocab_size, (1000, 20))
targets = torch.randint(0, vocab_size, (1000, 20))

dataset = Mydataset(data, targets)
data_loader = DataLoader(dataset, batch_size = batch_size, shuffle = True)

epochs = 1
for epoch in range(epochs):
    for input_data, target_data in data_loader:
        optimizer.zero_grad()
        output = model(input_data)
        loss = criterion(output.view(-1, vocab_size), target_data.view(-1))
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
    
    if loss.item() < 0.1:
        break


Epoch [1/10], Loss: 10.0787
Epoch [2/10], Loss: 9.7455
Epoch [3/10], Loss: 9.7611
Epoch [4/10], Loss: 9.5394
Epoch [5/10], Loss: 9.4493
Epoch [6/10], Loss: 9.4212
Epoch [7/10], Loss: 9.3673
Epoch [8/10], Loss: 9.3838
Epoch [9/10], Loss: 9.2180
Epoch [10/10], Loss: 9.0523


In [71]:
def predict(sentence):
    model.eval()
    with torch.no_grad():
        input_seq = torch.tensor([pad_sequence(tokenize(sentence), max_length)], dtype=torch.long)
        output = model(input_seq)
        output_seq = torch.argmax(output, dim=2).numpy().flatten()
        response = detokenize(output_seq)
    return response

def speak(speech):
    tts.speak(speech)


while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
        break
    response = predict(parse(user_input))
    print(f"Bot: {response}")
    speak(response)

Bot: wakening cleansers trade.why trade.why trade.why trade.why trade.why trade.why trade.why trade.why trade.why trade.why trade.why trade.why trade.why trade.why send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send send s