In [None]:
import pandas as pd
import numpy as np
import re

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense

# Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load the dataset
data = pd.read_csv('/content/drive/MyDrive/Summary/review2.csv')

# Preprocess the data
def preprocess_text(text):
    if isinstance(text, str):  # Check if the input is a string
        text = text.lower()
        text = re.sub(r"[^a-zA-Z0-9]", " ", text)
    else:
        text = str(text)  # Convert non-string values to strings
    return text

data['Text'] = data['Text'].apply(preprocess_text)
data['Summary'] = data['Summary'].apply(preprocess_text)

# ... rest of your code ...

# Tokenization
x_tokenizer = Tokenizer()
x_tokenizer.fit_on_texts(data['Text'])

y_tokenizer = Tokenizer()
y_tokenizer.fit_on_texts(data['Summary'])

# Convert text sequences to integer sequences
x_sequences = x_tokenizer.texts_to_sequences(data['Text'])
y_sequences = y_tokenizer.texts_to_sequences(data['Summary'])


# ... rest of your code ...

# Define hyperparameters
max_len_text = 300  # Adjust this value as needed
max_len_summary = 100
vocab_size_x = len(x_tokenizer.word_index) + 1
vocab_size_y = len(y_tokenizer.word_index) + 1
embedding_dim = 100
latent_dim = 300

# Padding sequences
x_sequences = pad_sequences(x_sequences, maxlen=max_len_text)
y_sequences = pad_sequences(y_sequences, maxlen=max_len_summary, padding='post')
# Add a padding token at the end of each sequence
y_sequences = pad_sequences(y_sequences, maxlen=max_len_summary+1, padding='post')

# ... rest of your code ...

# ... rest of your code ...

# Define the encoder-decoder model
def define_model(vocab_size_x, vocab_size_y, max_len_text, max_len_summary, embedding_dim, latent_dim):
    # Encoder
    encoder_inputs = Input(shape=(max_len_text,))
    enc_emb = Embedding(vocab_size_x, embedding_dim, trainable=True)(encoder_inputs)
    encoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True)
    encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)
    encoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True)
    encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)
    encoder_states = [state_h2, state_c2]

    # Decoder
    decoder_inputs = Input(shape=(max_len_summary,))
    dec_emb = Embedding(vocab_size_y, embedding_dim, trainable=True)(decoder_inputs)
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)
    decoder_dense = Dense(vocab_size_y, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)

    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

# Create the model
model = define_model(vocab_size_x, vocab_size_y, max_len_text, max_len_summary, embedding_dim, latent_dim)
model.summary()

# Train the model
model.fit([x_sequences, y_sequences[:,:-1]], y_sequences.reshape(y_sequences.shape[0], y_sequences.shape[1], 1)[:,1:], epochs=5, batch_size=64)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Epoch 1/5
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 120ms/step - accuracy: 0.9435 - loss: 1.4199
Epoch 2/5
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 123ms/step - accuracy: 0.9687 - loss: 0.2296
Epoch 3/5
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 128ms/step - accuracy: 0.9681 - loss: 0.2292
Epoch 4/5
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 131ms/step - accuracy: 0.9687 - loss: 0.2214
Epoch 5/5
[1m235/235[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 129ms/step - accuracy: 0.9688 - loss: 0.2171


<keras.src.callbacks.history.History at 0x7b1d189a2b30>

In [None]:
model.save('/content/drive/MyDrive/Summary/mymodel.keras')

In [None]:
import pandas as pd
import numpy as np
import re
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define the directory for saving files
save_dir = '/content/drive/MyDrive/Summary/'

# Load the dataset
data = pd.read_csv(save_dir + 'review3.csv')

# Preprocess the data
def preprocess_text(text):
    if isinstance(text, str):  # Check if the input is a string
        text = text.lower()
        text = re.sub(r"[^a-zA-Z0-9]", " ", text)
    else:
        text = str(text)  # Convert non-string values to strings
    return text

data['Text'] = data['Text'].apply(preprocess_text)
data['Summary'] = data['Summary'].apply(preprocess_text)

# Tokenization
x_tokenizer = Tokenizer()
x_tokenizer.fit_on_texts(data['Text'])

y_tokenizer = Tokenizer()
y_tokenizer.fit_on_texts(data['Summary'])

# Convert text sequences to integer sequences
x_sequences = x_tokenizer.texts_to_sequences(data['Text'])
y_sequences = y_tokenizer.texts_to_sequences(data['Summary'])

# Define hyperparameters
max_len_text = 1000  # Adjust this value as needed
max_len_summary = 100
vocab_size_x = len(x_tokenizer.word_index) + 1
vocab_size_y = len(y_tokenizer.word_index) + 1
embedding_dim = 100
latent_dim = 300

# Padding sequences
x_sequences = pad_sequences(x_sequences, maxlen=max_len_text)
# Add a padding token at the end of each sequence
y_sequences = pad_sequences(y_sequences, maxlen=max_len_summary + 1, padding='post')

# Define the encoder-decoder model
def define_model(vocab_size_x, vocab_size_y, max_len_text, max_len_summary, embedding_dim, latent_dim):
    # Encoder
    encoder_inputs = Input(shape=(max_len_text,))
    enc_emb = Embedding(vocab_size_x, embedding_dim, trainable=True)(encoder_inputs)
    encoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True)
    encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)
    encoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True)
    encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)
    encoder_states = [state_h2, state_c2]

    # Decoder
    decoder_inputs = Input(shape=(max_len_summary,))
    dec_emb = Embedding(vocab_size_y, embedding_dim, trainable=True)(decoder_inputs)
    decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=encoder_states)
    decoder_dense = Dense(vocab_size_y, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)

    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

# Create the model
model = define_model(vocab_size_x, vocab_size_y, max_len_text, max_len_summary, embedding_dim, latent_dim)
model.summary()

# Train the model
model.fit(
    [x_sequences, y_sequences[:, :-1]],
    y_sequences.reshape(y_sequences.shape[0], y_sequences.shape[1], 1)[:, 1:],
    epochs=15,
    batch_size=64
)

# Save the model
model.save(save_dir + 'mymodel2.keras')

# Save the tokenizers
with open(save_dir + 'x_tokenizer.pkl', 'wb') as f:
    pickle.dump(x_tokenizer, f)

with open(save_dir + 'y_tokenizer.pkl', 'wb') as f:
    pickle.dump(y_tokenizer, f)

print("Model and tokenizers saved successfully in:", save_dir)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Epoch 1/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 325ms/step - accuracy: 0.9591 - loss: 0.8405
Epoch 2/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 339ms/step - accuracy: 0.9688 - loss: 0.2224
Epoch 3/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 337ms/step - accuracy: 0.9704 - loss: 0.2030
Epoch 4/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 337ms/step - accuracy: 0.9711 - loss: 0.1894
Epoch 5/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 336ms/step - accuracy: 0.9723 - loss: 0.1759
Epoch 6/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 339ms/step - accuracy: 0.9729 - loss: 0.1661
Epoch 7/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 338ms/step - accuracy: 0.9736 - loss: 0.1566
Epoch 8/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 339ms/step - accuracy: 0.9739 - loss: 0.1494
Epoch 9/

In [None]:
# Evaluate the model on the training data
loss, accuracy = model.evaluate([x_sequences, y_sequences[:, :-1]], y_sequences.reshape(y_sequences.shape[0], y_sequences.shape[1], 1)[:, 1:])

# Print the evaluation metrics
print("Loss:", loss)
print("Accuracy:", accuracy)

# You can also use other evaluation metrics provided by TensorFlow Keras
from tensorflow.keras.metrics import Precision, Recall, F1Score

# Define the metrics
precision_metric = Precision()
recall_metric = Recall()
f1_metric = F1Score()

# Evaluate the model on the training data for these metrics
precision_value, recall_value, f1_value = model.evaluate(
    [x_sequences, y_sequences[:, :-1]], y_sequences.reshape(y_sequences.shape[0], y_sequences.shape[1], 1)[:, 1:], return_metrics=[precision_metric, recall_metric, f1_metric])

print("Precision:", precision_value)
print("Recall:", recall_value)
print("F1 Score:", f1_value)

[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 82ms/step - accuracy: 0.9802 - loss: 0.0947
Loss: 0.09441691637039185
Accuracy: 0.9802197813987732


ValueError: Arguments not recognized: {'return_metrics': [<Precision name=precision_1>, <Recall name=recall_1>, <F1Score name=f1_score>]}

# New Section

In [3]:
from IPython.display import Javascript
from IPython.display import display
import time

def get_notebook_name():
  display(Javascript('IPython.notebook.kernel.execute("notebook_name = " + "\'"+IPython.notebook.notebook_name+"\'");'))
  # Introduce a small delay to allow the Javascript to execute
  time.sleep(1)
  return get_ipython().user_ns['notebook_name']

notebook_name = get_notebook_name()
print(notebook_name)

<IPython.core.display.Javascript object>

KeyError: 'notebook_name'