<a href="https://colab.research.google.com/github/Sabarinathan612/machine_learning_project/blob/main/mini_chatbot_ml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
import re

# Sample dataset (input-output pairs)
conversations = [
    ("Hi", "Hello! How can I help you?"),
    ("What's your name?", "I am a chatbot created by Sabari"),
    ("How are you?", "I'm just a bunch of code, so I don't have feelings, but thanks for asking!"),
    ("Goodbye", "Goodbye! Have a nice day!"),
]

# Preprocessing function
def preprocess_sentence(sentence):
    sentence = sentence.lower().strip()
    sentence = re.sub(r"([?.!,¿])", r" \1 ", sentence)
    sentence = re.sub(r'[" "]+', " ", sentence)
    sentence = re.sub(r"[^a-zA-Z?.!,¿]+", " ", sentence)
    sentence = sentence.strip()
    return sentence

# Preprocess the dataset
input_texts = []
target_texts = []
for input_text, target_text in conversations:
    input_texts.append(preprocess_sentence(input_text))
    target_texts.append(f'start {preprocess_sentence(target_text)} end')

# Tokenization
tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
tokenizer.fit_on_texts(input_texts + target_texts)
input_sequences = tokenizer.texts_to_sequences(input_texts)
target_sequences = tokenizer.texts_to_sequences(target_texts)

# Pad sequences
input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, padding='post')
target_sequences = tf.keras.preprocessing.sequence.pad_sequences(target_sequences, padding='post')

vocab_size = len(tokenizer.word_index) + 1
max_length_input = input_sequences.shape[1]
max_length_target = target_sequences.shape[1]

# Shift the target sequences by one position
target_sequences_shifted = np.zeros(target_sequences.shape)
target_sequences_shifted[:, :-1] = target_sequences[:, 1:]
target_sequences_shifted[:, -1] = 0  # Padding with zero at the end

from tensorflow.keras.layers import Input, LSTM, Embedding, Dense
from tensorflow.keras.models import Model

embedding_dim = 256
units = 512

# Encoder
encoder_inputs = Input(shape=(max_length_input,))
encoder_embedding = Embedding(vocab_size, embedding_dim)(encoder_inputs)
encoder_lstm = LSTM(units, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(max_length_target,))
decoder_embedding = Embedding(vocab_size, embedding_dim)(decoder_inputs)
decoder_lstm = LSTM(units, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()

# Train the model
batch_size = 64
epochs = 100

model.fit(
    [input_sequences, target_sequences],
    target_sequences_shifted,
    batch_size=batch_size,
    epochs=epochs,
    validation_split=0.2
)

# Encoder model
encoder_model = Model(encoder_inputs, encoder_states)

# Decoder model
decoder_state_input_h = Input(shape=(units,))
decoder_state_input_c = Input(shape=(units,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_lstm_outputs, state_h, state_c = decoder_lstm(
    decoder_embedding, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_lstm_outputs)
decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)

# Reverse-lookup token index to convert decoded sequences back to text
reverse_word_index = dict((i, word) for word, i in tokenizer.word_index.items())

# Decode sequence
def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['start']

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)

        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = reverse_word_index.get(sampled_token_index, '')

        decoded_sentence += ' ' + sampled_word

        if sampled_word == 'end' or len(decoded_sentence.split()) > max_length_target:
            stop_condition = True

        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index

        states_value = [h, c]

    return decoded_sentence.strip()

# Interactive chat
def chat():
    print("Start chatting with the bot (type 'exit' to stop)!")
    while True:
        input_text = input("You: ")
        if input_text.lower() == 'exit':
            print("Goodbye!")
            break

        input_seq = tokenizer.texts_to_sequences([preprocess_sentence(input_text)])
        input_seq = tf.keras.preprocessing.sequence.pad_sequences(input_seq, maxlen=max_length_input, padding='post')
        decoded_sentence = decode_sequence(input_seq)
        print(f"Bot: {decoded_sentence}")

chat()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 5)]                  0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 22)]                 0         []                            
                                                                                                  
 embedding (Embedding)       (None, 5, 256)               10496     ['input_1[0][0]']             
                                                                                                  
 embedding_1 (Embedding)     (None, 22, 256)              10496     ['input_2[0][0]']             
                                                                                              