In [37]:
from google.colab import drive
drive.mount('/content/drive')

import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import pandas as pd

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [38]:
# Path to the model file,
model_path = '/content/drive/My Drive/bilstm_model.h5'

# Loading the model
model = load_model(model_path)

with open('/content/drive/My Drive/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
with open('/content/drive/My Drive/max_seq_length.pkl', 'rb') as f:
    max_seq_length = pickle.load(f)

In [39]:
# Function to load and preprocess text data from a CSV file
def load_and_preprocess_data(filepath):
    # Read data from CSV file at 'filepath' into a DataFrame
    data = pd.read_csv(filepath)
    # Handle possible NaN values by converting them to string
    data.fillna('', inplace=True)
    # Combine 'Claim' and 'Evidence' columns into a single string per row for processing
    texts = []
    for index, row in data.iterrows():
        try:
            combined_text = str(row['Claim']) + " " + str(row['Evidence'])
            texts.append(combined_text)
        except AttributeError as e:
            print(f"Error processing row {index}: {row['Claim']}, {row['Evidence']}")
            raise e

    return texts




In [42]:
def predict_from_csv(path):
    # Load and preprocess test data in a similar fashion as training data
    test_texts = load_and_preprocess_data(path)
    test_sequences = tokenizer.texts_to_sequences(test_texts)
    test_data = pad_sequences(test_sequences, maxlen=max_seq_length)
    # Generate predictions for the validation set
    test_predictions = model.predict(test_data)
    # Convert probabilities to binary labels (0 or 1) based on a 0.5 threshold
    test_predicted_labels = (test_predictions > 0.5).astype(int)

    # Create a DataFrame with the predicted labels
    predictions_df = pd.DataFrame(test_predicted_labels, columns=['prediction'])

    # Save the predictions to a CSV file
    predictions_df.to_csv('test_predictions.csv', index=False)

path = '/content/test.csv'
predict_from_csv(path)



In [5]:
def predict_claim_evidence(claim, evidence):
    tokenizer = Tokenizer(num_words=5000)
    # Combine claim and evidence into a single input string
    combined_input = claim + " " + evidence  # Modify this if your model expects a different format

    # Tokenize and pad the input
    input_sequence = tokenizer.texts_to_sequences([combined_input])
    padded_input = pad_sequences(input_sequence, maxlen=277)

    # Predict the output
    prediction = model.predict(padded_input)

    # Convert prediction probability to True or False
    is_true = (prediction > 0.5).astype(bool)

    return is_true[0, 0]  # Adjust indexing based on how your model outputs predictions

# Example usage
claim = "The capital of France is Paris."
evidence = "Paris has been the capital of France since the 6th century."
result = predict_claim_evidence(claim, evidence)
print("The claim is", "true" if result else "false")



The claim is true
