In [8]:
# Libraries
!pip install keras==2.16
from pathlib import Path
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
from tensorflow.keras.models import load_model

In [15]:
!pip install tensorflow==2.16.2

Collecting tensorflow==2.16.2
  Obtaining dependency information for tensorflow==2.16.2 from https://files.pythonhosted.org/packages/9d/72/6f09443493b9df2fd8a9585c9af4d9453762906a8e5735a8a5efa6e3d1e3/tensorflow-2.16.2-cp311-cp311-macosx_12_0_arm64.whl.metadata
  Downloading tensorflow-2.16.2-cp311-cp311-macosx_12_0_arm64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow==2.16.2)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/a2/ad/e0d3c824784ff121c03cc031f944bc7e139a8f1870ffd2845cc2dd76f6c4/absl_py-2.1.0-py3-none-any.whl.metadata
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow==2.16.2)
  Obtaining dependency information for astunparse>=1.6.0 from https://files.pythonhosted.org/packages/2b/03/13dde6512ad7b4557eb792fbcf0c653af6076b81e5941d36ec61f7ce6028/astunparse-1.6.3-py2.py3-none-any.whl.metadata
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4

Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard<2.17,>=2.16->tensorflow==2.16.2)
  Obtaining dependency information for tensorboard-data-server<0.8.0,>=0.7.0 from https://files.pythonhosted.org/packages/7a/13/e503968fefabd4c6b2650af21e110aa8466fe21432cd7c43a84577a89438/tensorboard_data_server-0.7.2-py3-none-any.whl.metadata
  Downloading tensorboard_data_server-0.7.2-py3-none-any.whl.metadata (1.1 kB)
Downloading tensorflow-2.16.2-cp311-cp311-macosx_12_0_arm64.whl (227.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.0/227.0 MB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading absl_py-2.1.0-py3-none-any.whl (133 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.7/133.7 kB[0m [31m35.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Downloading flatbuffers-24.3.25-py2.py3-none-any.whl (26 kB)
Downloading gast-0.6.0-py3-none-any.whl (21 kB)
Downloadin

In [12]:
tf.__version__

'2.11.0'

In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
from pathlib import Path

nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)

MODEL_PATH = Path("../data/FirstRun.keras")
DATA_PATH = Path("../data/reviews.csv")

def load_data(file_path):
    return pd.read_csv(file_path)

def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))
    return ' '.join(token for token in tokens if token.isalnum() and token not in stop_words)

def prepare_data(data, text_column='text'):
    data['processed_text'] = data[text_column].apply(preprocess_text)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data['processed_text'])
    sequences = tokenizer.texts_to_sequences(data['processed_text'])
    return pad_sequences(sequences), tokenizer

def load_model_safe(model_path):
    try:
        custom_objects = {
            'InputLayer': lambda config: tf.keras.layers.InputLayer(
                input_shape=config['batch_shape'][1:] if 'batch_shape' in config else None,
                **{k: v for k, v in config.items() if k != 'batch_shape'}
            )
        }
        return load_model(model_path, custom_objects=custom_objects)
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

def predict_polarity(model, tokenizer, text, max_length):
    processed = preprocess_text(text)
    sequence = tokenizer.texts_to_sequences([processed])
    padded = pad_sequences(sequence, maxlen=max_length)
    prediction = model.predict(padded)[0][0]
    return "Most Likely Negative" if prediction > 0.5 else "Most Likely Positive", prediction

def main():
    # Load the model
    model = load_model_safe(MODEL_PATH)
    if model is None:
        print("Failed to load the model. Please check the model file and TensorFlow version.")
        return  # This return is now inside the function

    # Load and prepare the data
    data = load_data(DATA_PATH)
    X_test, tokenizer = prepare_data(data)

    # Make predictions
    y_test = data['text']  # Assuming 'text' is the correct column for labels
    y_pred_proba = model.predict(X_test)
    y_pred = (y_pred_proba > 0.5).astype(int)

    # Print classification report
    print(classification_report(y_test, y_pred))

    # Test the model on a sample review
    sample_review = data['text'].iloc[13]
    print("Sample review:", sample_review)
    result, prob = predict_polarity(model, tokenizer, sample_review, X_test.shape[1])
    print(f"Prediction: {result} (Probability: {prob:.2f})")

if __name__ == "__main__":
    main()
    
    

Error loading model: Unable to open file (file signature not found)
Failed to load the model. Please check the model file and TensorFlow version.


In [18]:
# Set the model's file path
file_path = Path("../data/SecondRun.keras")

In [19]:
print(MODEL_PATH)

../data/FirstRun.keras


In [21]:
from tensorflow.keras.models import load_model
model = load_model("../data/SecondRun.keras")

OSError: Unable to open file (file signature not found)

In [5]:
print(file_path)

../data/FirstRun.keras


In [10]:

# Load the model to a new object
nn_import = tf.keras.models.load_model(file_path)
# Load your test data
# Assuming you have a CSV file with 'text' and 'target' columns
test_data = pd.read_csv('../data/reviews.csv')

OSError: Unable to open file (file signature not found)

In [None]:
#Preprocess for tokenization
def preprocess_text(text):
    # Tokenization
    tokens = word_tokenize(text.lower())
    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.isalnum() and token not in stop_words]
    return ' '.join(tokens)

In [None]:
# Preprocess the test data
test_data['processed_text'] = test_data['text'].apply(preprocess_text)

In [None]:
# Tokenize and pad the sequences
# Note: You should use the same tokenizer as in training, or load a saved tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(test_data['processed_text'])
sequences = tokenizer.texts_to_sequences(test_data['processed_text'])
X_test = pad_sequences(sequences)

In [None]:
# Get the true labels
y_test = test_data['text']

# Make predictions
y_pred_proba = nn_import.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype(str)
# Generate and print the classification report
print(classification_report(y_test, y_pred))

In [None]:
# Set the model's file path
file_path = Path("SecondRun.keras")

# Load the model to a new object
nn_import = tf.keras.models.load_model(file_path)

In [None]:
# Load your test data
# Assuming you have a CSV file with 'text' and 'target' columns
test_data = pd.read_csv('reviews.csv')

In [None]:
test_data.columns

In [None]:
test_data.head()

In [None]:
#Preprocess for tokenization
def preprocess_text(text):
    # Tokenization
    tokens = word_tokenize(text.lower())
    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token.isalnum() and token not in stop_words]
    return ' '.join(tokens)

In [1]:

# Preprocess the test data
test_data['processed_text'] = test_data['text'].apply(preprocess_text)

NameError: name 'test_data' is not defined

In [None]:
# Tokenize and pad the sequences
# Note: You should use the same tokenizer as in training, or load a saved tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(test_data['processed_text'])
sequences = tokenizer.texts_to_sequences(test_data['processed_text'])
X_test = pad_sequences(sequences)

In [None]:
# Get the true labels
y_test = test_data['text']

# Make predictions
y_pred_proba = nn_import.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype(str)

In [None]:
# Generate and print the classification report
print(classification_report(y_test, y_pred))

In [None]:
# Application (simple example)
def predict_polarity(text):
    processed = preprocess_text(text)
    sequence = tokenizer.texts_to_sequences([processed])
    padded = pad_sequences(sequence, maxlen=X_test.shape[1])
    prediction = nn_import.predict(padded)[0][0]
    print(prediction)
    return "Most Likely Negative" if prediction > 0.5 else "Most Likely Positive"

In [None]:
greview = test_data['text'][13]
print(greview)

In [None]:
# Test the application
test_text = greview
print(predict_polarity(test_text))