In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
val_df = pd.read_csv('validation.csv')

# Separate inputs (articles) and targets (summaries)
X_train, y_train = train_df['article'], train_df['highlights']
X_val, y_val = val_df['article'], val_df['highlights']
X_test, y_test = test_df['article'], test_df['highlights']


In [2]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train.tolist() + y_train.tolist())

# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
y_train_seq = tokenizer.texts_to_sequences(y_train)

# Padding sequences
X_train_padded = pad_sequences(X_train_seq, maxlen=400, padding='post')
y_train_padded = pad_sequences(y_train_seq, maxlen=100, padding='post')

vocab_size = len(tokenizer.word_index) + 1


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Attention

# Encoder
encoder_inputs = Input(shape=(400,))
encoder_embedding = Embedding(vocab_size, 256)(encoder_inputs)
encoder_lstm = LSTM(256, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(100,))
decoder_embedding = Embedding(vocab_size, 256)(decoder_inputs)
decoder_lstm = LSTM(256, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)

# Attention mechanism
attention = Attention()
attention_out = attention([decoder_outputs, encoder_outputs])

# Output layer
decoder_dense = Dense(vocab_size, activation='softmax')
decoder_outputs = decoder_dense(attention_out)

# Define the model
model = tf.keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)


In [4]:
# Tokenize and pad the validation data
X_val_seq = tokenizer.texts_to_sequences(X_val)
y_val_seq = tokenizer.texts_to_sequences(y_val)

X_val_padded = pad_sequences(X_val_seq, maxlen=400, padding='post')
y_val_padded = pad_sequences(y_val_seq, maxlen=100, padding='post')


In [5]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit([X_train_padded, y_train_padded], y_train_padded, epochs=10, batch_size=64, validation_data=([X_val_padded, y_val_padded], y_val_padded))


Epoch 1/10




ResourceExhaustedError: Graph execution error:

Detected at node StatefulPartitionedCall/functional_1/dense_1/Softmax defined at (most recent call last):
<stack traces unavailable>
OOM when allocating tensor with shape[64,100,813500] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator mklcpu
	 [[{{node StatefulPartitionedCall/functional_1/dense_1/Softmax}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_one_step_on_iterator_4357]

In [None]:
# For predictions (inference)
def decode_sequence(input_seq):
    # Encode the input
    states_value = encoder_model.predict(input_seq)
    
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1))

    # Sampling loop for generating the output sequence
    decoded_sentence = ''
    stop_condition = False
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)
        # Get the most probable next token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = tokenizer.index_word[sampled_token_index]

        decoded_sentence += ' ' + sampled_word

        if (sampled_word == 'eos' or len(decoded_sentence) > max_summary_len):
            stop_condition = True

        target_seq = np.zeros((1, 1))
        target_seq[0, 0] = sampled_token_index
        states_value = [h, c]

    return decoded_sentence
