In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, Dropout, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model

# Load dataset
data = pd.read_csv('/content/Datasetpostu.csv')

# Combine all user and bot messages
all_text = list(data['message'].values)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_text)
vocab_size = len(tokenizer.word_index) + 1

# Convert text to sequences
data['message_seq'] = tokenizer.texts_to_sequences(data['message'])

# Prepare the input and target sequences for the Transformer
# This example assumes that each conversation is a single sequence, split into input and target
max_len = 50  # Adjust as needed

X_input = []
X_target = []

for conv_id in data['conversation_id'].unique():
    conversation = data[data['conversation_id'] == conv_id]
    user_messages = conversation[conversation['sender'] == 'user']['message_seq'].values
    bot_messages = conversation[conversation['sender'] == 'bot']['message_seq'].values

    for i in range(len(user_messages)):
        input_seq = user_messages[i]
        target_seq = bot_messages[i] if i < len(bot_messages) else []

        X_input.append(input_seq)
        X_target.append(target_seq)

# Pad sequences
X_input = pad_sequences(X_input, maxlen=max_len, padding='post')
X_target = pad_sequences(X_target, maxlen=max_len, padding='post')

# Define Transformer Encoder and Decoder functions
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Dropout(dropout)(x)
    res = x + inputs

    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return x + res

def transformer_decoder(target, context, head_size, num_heads, ff_dim, dropout=0):
    x = LayerNormalization(epsilon=1e-6)(target)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Dropout(dropout)(x)
    x = x + target

    x = LayerNormalization(epsilon=1e-6)(x)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, context)
    x = Dropout(dropout)(x)
    x = x + target

    x = LayerNormalization(epsilon=1e-6)(x)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(target.shape[-1])(x)
    return x + target

def build_transformer(vocab_size, max_len, num_layers, head_size, num_heads, ff_dim, dropout=0):
    inputs = Input(shape=(max_len,))
    x = Embedding(vocab_size, head_size)(inputs)
    for _ in range(num_layers):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    targets = Input(shape=(max_len,))
    y = Embedding(vocab_size, head_size)(targets)
    for _ in range(num_layers):
        y = transformer_decoder(y, x, head_size, num_heads, ff_dim, dropout)

    outputs = Dense(vocab_size, activation='softmax')(y)
    model = Model(inputs=[inputs, targets], outputs=outputs)
    return model

# Define model parameters
max_len = 50        # Maximum length of input and output sequences
num_layers = 4      # Number of Transformer layers
head_size = 64      # Dimensionality of the attention heads
num_heads = 4       # Number of attention heads
ff_dim = 128        # Dimensionality of the feed-forward layers
dropout = 0.1       # Dropout rate

# Build the model
model = build_transformer(vocab_size, max_len, num_layers, head_size, num_heads, ff_dim, dropout)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Print model summary
model.summary()

# Example dummy data for training
X_input = np.array(X_input)
X_target = np.array(X_target)
y_train = np.array(X_target)

# Train the model
model.fit([X_input, X_target], y_train, epochs=1, batch_size=32)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 426ms/step - accuracy: 0.3457 - loss: 5.0394


<keras.src.callbacks.history.History at 0x7e09324a65f0>