In [41]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import pandas as pd

# Load your data
data = pd.read_csv('PG.csv')
train_df, test_df = train_test_split(data, test_size=0.2, random_state=42)

# Text preprocessing
text_transformer = TfidfVectorizer(stop_words='english', ngram_range=(1, 2))
X_train = text_transformer.fit_transform(train_df['hyp'])
X_test = text_transformer.transform(test_df['hyp'])
y_train = train_df['label'].values
y_test = test_df['label'].values

# Convert sparse matrix to dense, if using TF-IDF and dataset is not too large
X_train = X_train.toarray()
X_test = X_test.toarray()

# Split the training data into training and validation sets
val_size = int(0.2 * X_train.shape[0])  # 20% for validation
X_val = X_train[:val_size]
y_val = y_train[:val_size]
X_train_new = X_train[val_size:]
y_train_new = y_train[val_size:]

# Define the model, assuming you're using a model architecture appropriate for your data
model = Sequential([
    Dense(128, activation='relu', input_dim=X_train_new.shape[1]),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model with manual validation data
history = model.fit(X_train_new, y_train_new, epochs=20, batch_size=10, validation_data=(X_val, y_val))

model.summary()
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.5824 - loss: 0.6906 - val_accuracy: 0.6226 - val_loss: 0.6705
Epoch 2/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5777 - loss: 0.6605 - val_accuracy: 0.6226 - val_loss: 0.6584
Epoch 3/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6316 - loss: 0.6069 - val_accuracy: 0.6226 - val_loss: 0.6400
Epoch 4/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7290 - loss: 0.5454 - val_accuracy: 0.6415 - val_loss: 0.6129
Epoch 5/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8864 - loss: 0.4008 - val_accuracy: 0.6792 - val_loss: 0.5741
Epoch 6/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9992 - loss: 0.2334 - val_accuracy: 0.7358 - val_loss: 0.5390
Epoch 7/20
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6107 - loss: 1.3361 
Test Accuracy: 61.19%


In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.optimizers import Adam
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
import pandas as pd
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
from tensorflow.keras.models import Model

# Load your data
data = pd.read_csv('PG.csv')
train_df, test_df = train_test_split(data, test_size=0.2, random_state=42)

# Text preprocessing
# For LSTM, instead of TF-IDF, we use tokenization and padding
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(train_df['hyp'])
X_train = tokenizer.texts_to_sequences(train_df['hyp'])
X_test = tokenizer.texts_to_sequences(test_df['hyp'])

# Padding sequences
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=100)
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=100)
y_train = train_df['label'].values
y_test = test_df['label'].values

input_text = Input(shape=(100,))  # Define input layer shape
x = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100)(input_text)
x = LSTM(64)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=input_text, outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=5, validation_split=0.2)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Epoch 1/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 0.6022 - loss: 0.6790 - val_accuracy: 0.5370 - val_loss: 0.6873
Epoch 2/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.6363 - loss: 0.6393 - val_accuracy: 0.5370 - val_loss: 0.6719
Epoch 3/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.7171 - loss: 0.5397 - val_accuracy: 0.6296 - val_loss: 0.6228
Epoch 4/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.9183 - loss: 0.3703 - val_accuracy: 0.6111 - val_loss: 0.5907
Epoch 5/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.9982 - loss: 0.0906 - val_accuracy: 0.6481 - val_loss: 0.6362
Epoch 6/10
[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - accuracy: 0.9938 - loss: 0.0359 - val_accuracy: 0.6481 - val_loss: 0.7491
Epoch 7/10
[1m43/43[0m [32m━━━━

In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
data = pd.read_csv('PG.csv')

# Prepare tokenizer
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(data['src'].astype(str) + ' ' + data['hyp'].astype(str))

# Tokenize and pad the sequences for both 'src' and 'hyp'
src_sequences = tokenizer.texts_to_sequences(data['src'].astype(str))
hyp_sequences = tokenizer.texts_to_sequences(data['hyp'].astype(str))
max_sequence_length = 100  # You can adjust this based on your specific dataset
src_data = pad_sequences(src_sequences, maxlen=max_sequence_length)
hyp_data = pad_sequences(hyp_sequences, maxlen=max_sequence_length)

# Split the data into training and validation sets
train_src, test_src, train_hyp, test_hyp, train_labels, test_labels = train_test_split(
    src_data, hyp_data, data['label'].values, test_size=0.2, random_state=42)

# Input layers
src_input = Input(shape=(max_sequence_length,), dtype='int32', name='src_input')
hyp_input = Input(shape=(max_sequence_length,), dtype='int32', name='hyp_input')

# Shared embedding layer
embedding_layer = Embedding(input_dim=10000, output_dim=128)

# Branch for source text
src_embedded = embedding_layer(src_input)
src_lstm = LSTM(64)(src_embedded)

# Branch for hypothesized text
hyp_embedded = embedding_layer(hyp_input)
hyp_lstm = LSTM(64)(hyp_embedded)

# Concatenate the outputs from both branches
concatenated = concatenate([src_lstm, hyp_lstm], axis=-1)

# Additional processing
x = Dense(64, activation='relu')(concatenated)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

# Define and compile the model
model = Model(inputs=[src_input, hyp_input], outputs=output)
model.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['accuracy'])

# Model summary to check the architecture
model.summary()

# Train the model
model.fit([train_src, train_hyp], train_labels, epochs=8, batch_size=64, validation_data=([test_src, test_hyp], test_labels))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate([test_src, test_hyp], test_labels)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


Epoch 1/8
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 212ms/step - accuracy: 0.5953 - loss: 0.6888 - val_accuracy: 0.6567 - val_loss: 0.6931
Epoch 2/8
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.5695 - loss: 0.6886 - val_accuracy: 0.6567 - val_loss: 0.6934
Epoch 3/8
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.5990 - loss: 0.6879 - val_accuracy: 0.6567 - val_loss: 0.6936
Epoch 4/8
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step - accuracy: 0.5795 - loss: 0.6864 - val_accuracy: 0.6567 - val_loss: 0.6939
Epoch 5/8
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.6023 - loss: 0.6851 - val_accuracy: 0.6567 - val_loss: 0.6940
Epoch 6/8
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step - accuracy: 0.5754 - loss: 0.6833 - val_accuracy: 0.6567 - val_loss: 0.6946
Epoch 7/8
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [28]:
from tensorflow.keras.layers import Layer
import tensorflow.keras.backend as K

class Attention(Layer):
    def __init__(self, **kwargs):
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(name="att_weight", shape=(input_shape[-1], 1),
                                 initializer="normal")
        self.b = self.add_weight(name="att_bias", shape=(input_shape[1], 1),
                                 initializer="zeros")
        super(Attention, self).build(input_shape)

    def call(self, x):
        # e = K.tanh(K.dot(x, self.W) + self.b)
        e = K.squeeze(K.tanh(K.dot(x, self.W) + self.b), axis=-1)
        alpha = K.softmax(e)
        context = x * K.expand_dims(alpha, axis=-1)
        context = K.sum(context, axis=1)
        return context

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

    def get_config(self):
        return super(Attention, self).get_config()


In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
data = pd.read_csv('PG.csv')

# Prepare tokenizer
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(data['src'].astype(str) + ' ' + data['hyp'].astype(str))

# Tokenize and pad the sequences for both 'src' and 'hyp'
src_sequences = tokenizer.texts_to_sequences(data['src'].astype(str))
hyp_sequences = tokenizer.texts_to_sequences(data['hyp'].astype(str))
max_sequence_length = 100
src_data = pad_sequences(src_sequences, maxlen=max_sequence_length)
hyp_data = pad_sequences(hyp_sequences, maxlen=max_sequence_length)

# Split the data
train_src, test_src, train_hyp, test_hyp, train_labels, test_labels = train_test_split(
    src_data, hyp_data, data['label'].values, test_size=0.2, random_state=42)

# Input layers
src_input = Input(shape=(max_sequence_length,), dtype='int32', name='src_input')
hyp_input = Input(shape=(max_sequence_length,), dtype='int32', name='hyp_input')

# Shared embedding layer
embedding_layer = Embedding(input_dim=10000, output_dim=128)

# Branch for source text with LSTM and Attention
src_embedded = embedding_layer(src_input)
src_lstm = LSTM(64, return_sequences=True)(src_embedded)
src_attention = Attention()(src_lstm)

# Branch for hypothesized text with LSTM and Attention
hyp_embedded = embedding_layer(hyp_input)
hyp_lstm = LSTM(64, return_sequences=True)(hyp_embedded)
hyp_attention = Attention()(hyp_lstm)

# Concatenate the outputs from both branches
concatenated = concatenate([src_attention, hyp_attention], axis=-1)

# Additional processing layers
x = Dense(64, activation='relu')(concatenated)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

# Define and compile the model
model = Model(inputs=[src_input, hyp_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Train the model
model.fit([train_src, train_hyp], train_labels, epochs=20, batch_size=64, validation_data=([test_src, test_hyp], test_labels))

# Evaluate the model
test_loss, test_accuracy = model.evaluate([test_src, test_hyp], test_labels)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 190ms/step - accuracy: 0.5878 - loss: 0.6798 - val_accuracy: 0.6567 - val_loss: 0.7193
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step - accuracy: 0.6071 - loss: 0.6792 - val_accuracy: 0.6567 - val_loss: 0.7254
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.5770 - loss: 0.6862 - val_accuracy: 0.6567 - val_loss: 0.6997
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.5956 - loss: 0.6787 - val_accuracy: 0.6567 - val_loss: 0.7020
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.5928 - loss: 0.6731 - val_accuracy: 0.6567 - val_loss: 0.7084
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.6084 - loss: 0.6764 - val_accuracy: 0.6567 - val_loss: 0.7113
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━

ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


In [39]:
import tensorflow as tf
from tensorflow.keras.layers import Layer, MultiHeadAttention, LayerNormalization, Dropout, Dense, Input, Embedding, concatenate
from tensorflow.keras.models import Model
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"), 
            Dense(embed_dim)
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training=False):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# Load dataset and preprocessing
data = pd.read_csv('PG.csv')
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(data['src'].astype(str) + ' ' + data['hyp'].astype(str))
src_sequences = tokenizer.texts_to_sequences(data['src'].astype(str))
hyp_sequences = tokenizer.texts_to_sequences(data['hyp'].astype(str))
max_sequence_length = 100
src_data = pad_sequences(src_sequences, maxlen=max_sequence_length)
hyp_data = pad_sequences(hyp_sequences, maxlen=max_sequence_length)
train_src, test_src, train_hyp, test_hyp, train_labels, test_labels = train_test_split(
    src_data, hyp_data, data['label'].values, test_size=0.2, random_state=42)

src_input = Input(shape=(max_sequence_length,), dtype='int32', name='src_input')
hyp_input = Input(shape=(max_sequence_length,), dtype='int32', name='hyp_input')
embedding_layer = Embedding(input_dim=10000, output_dim=128)
src_embedded = embedding_layer(src_input)
hyp_embedded = embedding_layer(hyp_input)
transformer_block = TransformerBlock(128, 8, 512)
src_transformer = transformer_block(src_embedded, training=True)
hyp_transformer = transformer_block(hyp_embedded, training=True)

print("src_transformer shape:", src_transformer.shape)  # Debugging output shapes
print("hyp_transformer shape:", hyp_transformer.shape)

# Apply pooling or reshaping as necessary
src_pooled = tf.reduce_mean(src_transformer, axis=1)
hyp_pooled = tf.reduce_mean(hyp_transformer, axis=1)

concatenated = concatenate([src_pooled, hyp_pooled], axis=-1)
x = Dense(64, activation='relu')(concatenated)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)
model = Model(inputs=[src_input, hyp_input], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

model.fit([train_src, train_hyp], train_labels, epochs=10, batch_size=64, validation_data=([test_src, test_hyp], test_labels))
test_loss, test_accuracy = model.evaluate([test_src, test_hyp], test_labels)
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')


src_transformer shape: (None, 100, 128)
hyp_transformer shape: (None, 100, 128)


ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```
