## Model 1

In [None]:
from transformers import BertTokenizer, TFBertModel
from tensorflow.keras.layers import Input, LSTM, Dense, TimeDistributed, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import tensorflow as tf

# Load pre-trained BERT model and tokenizer
bert_model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(bert_model_name)
bert_model = TFBertModel.from_pretrained(bert_model_name)

# Define the BERT encoder
encoder_inputs = Input(shape=(max_sequence_length,), dtype=tf.int32, name='encoder_inputs')
bert_output = bert_model(encoder_inputs)
encoder_embedding = bert_output.last_hidden_state

# Use LSTM layers after BERT embeddings
encoder_lstm1 = Bidirectional(LSTM(96, return_sequences=True, name='encoder_lstm_1', dropout=0.3,
                                    recurrent_dropout=0.3, kernel_regularizer=l2(1e-4)))(encoder_embedding)
encoder_lstm2, forward_h2, forward_c2, backward_h2, backward_c2 = Bidirectional(LSTM(96, return_sequences=True, return_state=True,
                                                    name='encoder_lstm_2', dropout=0.3, recurrent_dropout=0.3,
                                                    kernel_regularizer=l2(1e-4)))(encoder_lstm1)

state_h = Concatenate()([forward_h2, backward_h2])
state_c = Concatenate()([forward_c2, backward_c2])

# Decoder setup remains the same
decoder_inputs = Input(shape=(21, y_max_sequence_length,), name='decoder_inputs')
decoder_embedding = TimeDistributed(Embedding(input_dim=y_vocab_size_input, output_dim=embedding_dim, weights=[embedding_matrix_input],
                                              trainable=False))(decoder_inputs)

decoder_outputs = []
for i in range(21):
    decoder_lstm_1 = LSTM(96 * 2, return_sequences=True, return_state=False, name=f'decoder_lstm_1_{i}',
                          dropout=0.3, recurrent_dropout=0.3, kernel_regularizer=l2(1e-4))
    decoder_lstm_2 = LSTM(96 * 2, return_sequences=True, return_state=False, name=f'decoder_lstm_2_{i}',
                          dropout=0.3, recurrent_dropout=0.3, kernel_regularizer=l2(1e-4))
    decoder_lstm_output_1 = decoder_lstm_1(decoder_embedding[:, i, :], initial_state=[state_h, state_c])
    decoder_lstm_output_2 = decoder_lstm_2(decoder_lstm_output_1, initial_state=[state_h, state_c])
    decoder_dense = TimeDistributed(Dense(y_vocab_size_input, activation='softmax'), name=f'decoder_dense_{i}')
    decoder_outputs.append(decoder_dense(decoder_lstm_output_2))

optimizer = Adam(learning_rate=0.004872)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-5, cooldown=0)

model.summary()


## Model 2

## A

In [None]:
from transformers import BertTokenizer, BertModel, GPT2Tokenizer, GPT2LMHeadModel
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda
from tensorflow.keras.models import Model
import numpy as np

# Load pre-trained models and tokenizers
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')

# Tokenize and encode inputs using BERT
def encode_with_bert(texts):
    inputs = bert_tokenizer(texts, return_tensors='pt', padding=True, truncation=True)
    outputs = bert_model(**inputs)
    return outputs.last_hidden_state

# Define the custom Keras layer to use BERT embeddings
class BertEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(BertEncoderLayer, self).__init__(**kwargs)
        self.bert_model = tf.keras.layers.Lambda(lambda x: encode_with_bert(x))
    
    def call(self, inputs):
        return self.bert_model(inputs)

# Define the custom Keras layer to use GPT-2
class Gpt2DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(Gpt2DecoderLayer, self).__init__(**kwargs)
        self.gpt2_model = gpt2_model

    def call(self, inputs):
        return self.gpt2_model(inputs)[0]

# Define input layers
encoder_inputs = Input(shape=(None,), dtype=tf.string, name='encoder_inputs')
decoder_inputs = Input(shape=(None,), name='decoder_inputs')

# Encode with BERT
bert_encoder = BertEncoderLayer()(encoder_inputs)

# Decode with GPT-2
gpt2_decoder = Gpt2DecoderLayer()(decoder_inputs)

# Define output layer
outputs = Dense(gpt2_tokenizer.vocab_size, activation='softmax')(gpt2_decoder)

# Create model
model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()


## B

In [None]:
from transformers import BertTokenizer, BertModel, GPT2Tokenizer, GPT2LMHeadModel
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, TimeDistributed, Reshape
from tensorflow.keras.models import Model

# Load pre-trained models and tokenizers
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_model = GPT2LMHeadModel.from_pretrained('gpt2')

# Tokenize and encode inputs using BERT
def encode_with_bert(texts):
    inputs = bert_tokenizer(texts, return_tensors='tf', padding=True, truncation=True)
    outputs = bert_model(inputs.input_ids)
    return outputs.last_hidden_state

# Define the custom Keras layer to use BERT embeddings
class BertEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(BertEncoderLayer, self).__init__(**kwargs)
        self.bert_model = tf.keras.layers.Lambda(lambda x: encode_with_bert(x))
    
    def call(self, inputs):
        return self.bert_model(inputs)

# Define the custom Keras layer to use GPT-2
class Gpt2DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(Gpt2DecoderLayer, self).__init__(**kwargs)
        self.gpt2_model = gpt2_model

    def call(self, inputs):
        return self.gpt2_model(inputs)[0]

# Define input layers
encoder_inputs = Input(shape=(None,), dtype=tf.string, name='encoder_inputs')
decoder_inputs = Input(shape=(21, None), dtype=tf.string, name='decoder_inputs')

# Encode with BERT
bert_encoder = BertEncoderLayer()(encoder_inputs)

# Process each of the 21 sequences separately
decoder_outputs = []
for i in range(21):
    gpt2_decoder = Gpt2DecoderLayer()(decoder_inputs[:, i])
    decoder_dense = Dense(gpt2_tokenizer.vocab_size, activation='softmax')(gpt2_decoder)
    decoder_outputs.append(decoder_dense)

# Stack decoder outputs
decoder_outputs = tf.stack(decoder_outputs, axis=1)

# Define the model
model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()


## C

In [None]:
from transformers import BertTokenizer, TFBertModel, GPT2Tokenizer, TFGPT2LMHeadModel
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, TimeDistributed, Lambda
from tensorflow.keras.models import Model

# Load pre-trained models and tokenizers
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_model = TFGPT2LMHeadModel.from_pretrained('gpt2')

# Tokenize inputs using BERT tokenizer
def bert_tokenize(texts):
    inputs = bert_tokenizer.batch_encode_plus(texts.numpy().tolist(), return_tensors="tf", padding=True, truncation=True)
    return inputs['input_ids'], inputs['attention_mask']

# Tokenize inputs using GPT-2 tokenizer
def gpt2_tokenize(texts):
    inputs = gpt2_tokenizer.batch_encode_plus(texts.numpy().tolist(), return_tensors="tf", padding=True, truncation=True)
    return inputs['input_ids']

# Define the custom Keras layer to use BERT embeddings
class BertEncoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(BertEncoderLayer, self).__init__(**kwargs)

    def call(self, inputs):
        input_ids, attention_mask = tf.py_function(func=bert_tokenize, inp=[inputs], Tout=[tf.int32, tf.int32])
        input_ids.set_shape([None, None])
        attention_mask.set_shape([None, None])
        outputs = bert_model(input_ids, attention_mask=attention_mask)[0]
        return outputs

# Define the custom Keras layer to use GPT-2
class Gpt2DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(Gpt2DecoderLayer, self).__init__(**kwargs)
        self.gpt2_model = gpt2_model

    def call(self, inputs):
        tokenized_inputs = tf.py_function(func=gpt2_tokenize, inp=[inputs], Tout=tf.int32)
        tokenized_inputs.set_shape([None, None])
        outputs = self.gpt2_model(tokenized_inputs).logits
        return outputs

# Define input layers
encoder_inputs = Input(shape=(None,), dtype=tf.string, name='encoder_inputs')
decoder_inputs = Input(shape=(21, None,), dtype=tf.string, name='decoder_inputs')

# Encode with BERT
bert_encoder = BertEncoderLayer()(encoder_inputs)

# Process each of the 21 sequences separately
decoder_outputs = []
for i in range(21):
    gpt2_decoder = Gpt2DecoderLayer()(decoder_inputs[:, i])
    decoder_dense = TimeDistributed(Dense(gpt2_tokenizer.vocab_size, activation='softmax'))(gpt2_decoder)
    decoder_outputs.append(decoder_dense)

# Stack decoder outputs
decoder_outputs = Lambda(lambda x: tf.stack(x, axis=1))(decoder_outputs)

# Define the model
model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()

## D

In [None]:
from transformers import BertTokenizer, TFBertModel, GPT2Tokenizer, TFGPT2LMHeadModel
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Lambda, Layer
from tensorflow.keras.models import Model

# Load pre-trained models and tokenizers
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = TFBertModel.from_pretrained('bert-base-uncased')

gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_model = TFGPT2LMHeadModel.from_pretrained('gpt2')

# Define BERT encoding function
def encode_with_bert(texts):
    inputs = bert_tokenizer.batch_encode_plus(texts.numpy().tolist(), return_tensors="tf", padding=True, truncation=True)
    outputs = bert_model(inputs['input_ids'])
    return outputs.last_hidden_state

# Define GPT-2 tokenization function
def tokenize_gpt2(texts):
    inputs = gpt2_tokenizer.batch_encode_plus(texts.numpy().tolist(), return_tensors="tf", padding=True, truncation=True)
    return inputs['input_ids']

# Custom Keras layer to use BERT embeddings
class BertEncoderLayer(Layer):
    def __init__(self, **kwargs):
        super(BertEncoderLayer, self).__init__(**kwargs)

    def call(self, inputs):
        input_ids = tf.py_function(func=encode_with_bert, inp=[inputs], Tout=tf.float32)
        input_ids.set_shape([None, None, 768])  # Set shape to BERT's output
        return input_ids

# Custom Keras layer to use GPT-2
class Gpt2DecoderLayer(Layer):
    def __init__(self, **kwargs):
        super(Gpt2DecoderLayer, self).__init__(**kwargs)
        self.gpt2_model = gpt2_model

    def call(self, inputs):
        tokenized_inputs = tf.py_function(func=tokenize_gpt2, inp=[inputs], Tout=tf.int32)
        tokenized_inputs.set_shape([None, None])
        outputs = self.gpt2_model(tokenized_inputs).logits
        return outputs

# Define input layers
encoder_inputs = Input(shape=(None,), dtype=tf.string, name='encoder_inputs')
decoder_inputs = Input(shape=(21, None,), dtype=tf.string, name='decoder_inputs')

# Encode with BERT
bert_encoder = BertEncoderLayer()(encoder_inputs)

# Efficiently process each of the 21 sequences separately
def process_decoder_sequence(i, decoder_inputs, gpt2_model):
    gpt2_decoder = Gpt2DecoderLayer()(decoder_inputs[:, i])
    decoder_dense = Dense(gpt2_tokenizer.vocab_size, activation='softmax')(gpt2_decoder)
    return decoder_dense

decoder_outputs = []
for i in range(21):
    decoder_output = process_decoder_sequence(i, decoder_inputs, gpt2_model)
    decoder_outputs.append(decoder_output)

# Stack decoder outputs
decoder_outputs = Lambda(lambda x: tf.stack(x, axis=1))(decoder_outputs)

# Define the model
model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()


## Model 3

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load pre-trained T5 model and tokenizer
t5_tokenizer = T5Tokenizer.from_pretrained('t5-small')
t5_model = T5ForConditionalGeneration.from_pretrained('t5-small')

# Encode input sequences
def encode_text(texts):
    return t5_tokenizer(texts, return_tensors='pt', padding=True, truncation=True)

# Define the model
def generate_text(inputs):
    inputs = encode_text(inputs)
    outputs = t5_model.generate(inputs['input_ids'])
    return t5_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example usage
input_texts = ["Translate English to French: How are you?", "Summarize this text."]
print(generate_text(input_texts))
