In [None]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import History

In [None]:
df = pd.read_csv("dialogs.txt", sep="\t")
a = pd.Series(df.columns)

In [None]:
df

In [None]:
a = a.rename({0: df.columns[0],1: df.columns[1]})
df.columns=['Questions','Answers']

In [None]:
df

In [None]:
questions, answers = zip(df)

In [None]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions + answers)

In [None]:
question_sequences = tokenizer.texts_to_sequences(questions)
answer_sequences = tokenizer.texts_to_sequences(answers)

In [None]:
max_seq_length = max(max(len(seq) for seq in question_sequences),
                     max(len(seq) for seq in answer_sequences))
question_sequences_padded = pad_sequences(question_sequences, maxlen=max_seq_length, padding='post')
answer_sequences_padded = pad_sequences(answer_sequences, maxlen=max_seq_length, padding='post')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_seq_length),
    tf.keras.layers.SimpleRNN(128),
    tf.keras.layers.Dense(len(tokenizer.word_index) + 1, activation='softmax')
])

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
history = History()
model.fit(question_sequences_padded, answer_sequences_padded, epochs=150, batch_size=512, callbacks=[history])

In [None]:
loss, accuracy = model.evaluate(question_sequences_padded, answer_sequences_padded)
print("Accuracy:", accuracy)
print("Loss:", loss)

In [None]:
plt.plot(history.history['accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()

plt.plot(history.history['loss'])
plt.title('Model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()