In [None]:
# for numerical equations
import numpy as np

# for tabular data handling and manipulation
import pandas as pd

# for converting words into tokens/identifiers
from tensorflow.keras.preprocessing.text import Tokenizer

# to ensure all sequences in a list have the same length by padding them.
from tensorflow.keras.preprocessing.sequence import pad_sequences

# to allow for the creation of a linear stack of layers in the neural network.
from tensorflow.keras.models import Sequential,load_model

# Importing various layers from keras.layers to be used in building the model:
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, GlobalMaxPooling1D, Dense,LSTM,Bidirectional,Dropout, BatchNormalization

# Importing Adam optimizer from keras.optimizers. Adam is an optimization algorithm that can be used instead of the classical stochastic gradient descent.
from tensorflow.keras.optimizers import Adam

# Used for hyperparameter selection
from sklearn.model_selection import ParameterGrid

# for metrics calculation
from sklearn.metrics import accuracy_score

In [None]:
#Bi-LSTM Model

# Function to load and preprocess text data from a CSV file
def load_and_preprocess_data(filepath):
    # Read data from CSV file at 'filepath' into a DataFrame
    data = pd.read_csv(filepath)
    # Combine 'Claim' and 'Evidence' columns into a single string per row for processing
    texts = data['Claim'] + " " + data['Evidence']
    # Extract labels for supervised learning
    labels = data['label']
    return texts, labels

# Initialize tokenizer with a maximum of 5000 words to consider
tokenizer = Tokenizer(num_words=5000)

# Load and preprocess training data
train_texts, train_labels = load_and_preprocess_data('/content/train.csv')
tokenizer.fit_on_texts(train_texts)  # Fit tokenizer to training texts
train_sequences = tokenizer.texts_to_sequences(train_texts)  # Convert texts to sequences of integers
max_seq_length = max(len(x) for x in train_sequences)  # Determine the maximum sequence length
train_data = pad_sequences(train_sequences, maxlen=max_seq_length)  # Pad sequences to have uniform length
train_labels = np.array(train_labels)  # Convert labels to numpy array for use in model

# Load and preprocess validation data in a similar fashion as training data
val_texts, val_labels = load_and_preprocess_data('/content/dev.csv')
val_sequences = tokenizer.texts_to_sequences(val_texts)
val_data = pad_sequences(val_sequences, maxlen=max_seq_length)
val_labels = np.array(val_labels)

# Setting up a grid of parameters for hyperparameter tuning using cross-validation
param_grid = {
    'embedding_dim': [100, 150, 200],
    'lstm_units': [64, 128],
    'dropout_rate': [0.3, 0.5]
}

# Initialize variables to store the best validation accuracy and corresponding parameters
best_val_accuracy = 0
best_params = None

# Loop through each combination of parameters in the parameter grid
for params in ParameterGrid(param_grid):
    print("Training with parameters:", params)

    # Building a Sequential model with a Bi-directional LSTM architecture
    model = Sequential([
        Embedding(input_dim=5000 + 1, output_dim=params['embedding_dim'], input_length=max_seq_length),  # Word embedding layer
        Bidirectional(LSTM(units=params['lstm_units'], dropout=params['dropout_rate'], return_sequences=False)),  # Bi-directional LSTM layer
        Dense(64, activation='relu'),  # Dense layer with 64 units and ReLU activation
        Dropout(0.2),  # Dropout layer for reducing overfitting by randomly setting input units to 0 during training
        Dense(32, activation='relu'),  # Another Dense layer for deeper understanding
        BatchNormalization(),  # Normalize activations of the previous layer at each batch
        Dense(1, activation='sigmoid')  # Output layer with sigmoid activation for binary classification
    ])

    # Compile the model with binary cross-entropy loss and the Adam optimizer
    model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])

    # Train the model with training data and validate with validation data
    history = model.fit(train_data, train_labels, batch_size=128, epochs=4, validation_data=(val_data, val_labels), verbose=2)

    # Obtain the validation accuracy from the trained model
    val_accuracy = history.history['val_accuracy'][-1]  # Get the last recorded validation accuracy
    print(f"Validation Accuracy: {val_accuracy:.4f}")

    # Update best model parameters if the current model performs better
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        best_params = params
        model.save('best_bilstm_model.h5')  # Save the best performing model

# Output the best validation accuracy and the parameters that achieved it
print("Best Validation Accuracy:", best_val_accuracy)
print("Best Parameters:", best_params)

# Example usage: Load the best model and evaluate it on new test data
# best_model = load_model('best_bilstm_model.h5')
# test_loss, test_accuracy = best_model.evaluate(test_data, test_labels, verbose=0)
# print(f"Test Accuracy: {test_accuracy:.4f}")


Training with parameters: {'dropout_rate': 0.3, 'embedding_dim': 100, 'lstm_units': 64}
Epoch 1/4
186/186 - 196s - loss: 0.5647 - accuracy: 0.7188 - val_loss: 0.5627 - val_accuracy: 0.7302 - 196s/epoch - 1s/step
Epoch 2/4
186/186 - 194s - loss: 0.3997 - accuracy: 0.8186 - val_loss: 0.4809 - val_accuracy: 0.7422 - 194s/epoch - 1s/step
Epoch 3/4
186/186 - 193s - loss: 0.3424 - accuracy: 0.8489 - val_loss: 0.4106 - val_accuracy: 0.8122 - 193s/epoch - 1s/step
Epoch 4/4
186/186 - 200s - loss: 0.2961 - accuracy: 0.8698 - val_loss: 0.4246 - val_accuracy: 0.8000 - 200s/epoch - 1s/step
Validation Accuracy: 0.8000
Training with parameters: {'dropout_rate': 0.3, 'embedding_dim': 100, 'lstm_units': 128}


  saving_api.save_model(


Epoch 1/4
186/186 - 374s - loss: 0.6039 - accuracy: 0.6982 - val_loss: 0.5875 - val_accuracy: 0.7302 - 374s/epoch - 2s/step
Epoch 2/4
186/186 - 365s - loss: 0.5753 - accuracy: 0.7293 - val_loss: 0.4892 - val_accuracy: 0.7396 - 365s/epoch - 2s/step
Epoch 3/4
186/186 - 334s - loss: 0.4569 - accuracy: 0.7804 - val_loss: 0.4452 - val_accuracy: 0.7983 - 334s/epoch - 2s/step
Epoch 4/4
186/186 - 333s - loss: 0.3866 - accuracy: 0.8246 - val_loss: 0.4262 - val_accuracy: 0.7926 - 333s/epoch - 2s/step
Validation Accuracy: 0.7926
Training with parameters: {'dropout_rate': 0.3, 'embedding_dim': 150, 'lstm_units': 64}
Epoch 1/4
186/186 - 229s - loss: 0.5085 - accuracy: 0.7477 - val_loss: 0.5164 - val_accuracy: 0.7302 - 229s/epoch - 1s/step
Epoch 2/4
186/186 - 218s - loss: 0.3723 - accuracy: 0.8343 - val_loss: 0.4682 - val_accuracy: 0.7423 - 218s/epoch - 1s/step
Epoch 3/4
186/186 - 226s - loss: 0.3141 - accuracy: 0.8617 - val_loss: 0.4289 - val_accuracy: 0.8041 - 226s/epoch - 1s/step
Epoch 4/4
186/18

In [None]:
# Generate predictions for the validation set
val_predictions = model.predict(val_data)
# Convert probabilities to binary labels (0 or 1) based on a 0.5 threshold
val_predicted_labels = (val_predictions > 0.5).astype(int)

# Create a DataFrame with the predicted labels
predictions_df = pd.DataFrame(val_predicted_labels, columns=['prediction'])

# Save the predictions to a CSV file
predictions_df.to_csv('validation_predictions.csv', index=False)

