In [1]:
import os
import numpy as np
import tensorflow as tf
from nltk.tokenize import word_tokenize
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to tokenize and preprocess text
def preprocess_text(text):
    # Tokenize text into words
    words = word_tokenize(text)

    # Convert words to lowercase
    words = [word.lower() for word in words]

    # Join words back into a sentence
    processed_text = ' '.join(words)

    return processed_text

def predict_paper(model, test_input_paper, tokenizer, max_len, threshold=0.37):
    # Preprocess the paper text
    processed_text = preprocess_text(test_input_paper)

    # Tokenize and convert text to sequences
    sequences = tokenizer.texts_to_sequences([processed_text])

    # Pad sequences to the correct length
    padded_sequences = pad_sequences(sequences, maxlen=max_len)

    # Use the trained model to predict probabilities
    probabilities = model.predict(padded_sequences)

    # Extract the predicted class and confidence
    prediction_value = probabilities[0][0]

    # Set the threshold for predicting "ChatGPT"
    if prediction_value >= threshold:
        prediction_label = "Human generated"
    else:
        prediction_label = "ChatGPT"

    return prediction_label, prediction_value

if __name__ == "__main__":
    # Load the new trained neural network model
    model = tf.keras.models.load_model('nn_twice_as_big_model.keras')

    # Set the maximum sequence length (adjust as needed)
    max_len = 871  # Adjust this value to match the expected input shape of your model

    # Tokenizer for text processing
    tokenizer = Tokenizer()

    # Folder containing test input files
    folder_path = 'Test_Inputs'

    # Iterate over files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".txt"):
            file_path = os.path.join(folder_path, filename)

            # Load the paper text from file
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
                test_input_paper = file.read()

            # Tokenize text
            tokenizer.fit_on_texts([test_input_paper])

            # Example usage
            prediction_label, prediction_value = predict_paper(model, test_input_paper, tokenizer, max_len)

            # Display the prediction for each file
            print(f"File: {filename}, Prediction: {prediction_label}, Confidence: {prediction_value:.4f}")


2023-11-28 10:33:51.108788: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


OSError: No file or directory found at nn_twice_as_big_model.keras