In [None]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from numpy import array

from keras.preprocessing.text import one_hot, Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers.core import Activation, Dropout, Dense
from keras.layers import Flatten, GlobalMaxPooling1D, Embedding, Conv1D, LSTM
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from numpy import asarray
from numpy import zeros
import matplotlib.pyplot as plt
from keras.layers import Conv1D, RNN, Embedding, SimpleRNN, Dense

from keras.models import Sequential
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, Attention, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam

In [None]:
data = pd.read_csv('processed_data.csv')
data

In [None]:
data = shuffle(data, random_state=42).reset_index(drop=True)
data['target'] = data['target'].apply(lambda x: 0 if x == 0 else 1)

In [None]:
X = data['text'].tolist()
Y = data['target'].tolist()
Y

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
X_train = [str(text) for text in X_train if isinstance(text, (str, float))]
X_test = [str(text) for text in X_test if isinstance(text, (str, float))]

In [None]:
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
word_tokenizer = Tokenizer()
word_tokenizer.fit_on_texts(X_train)

X_train = word_tokenizer.texts_to_sequences(X_train)
X_test = word_tokenizer.texts_to_sequences(X_test)

In [None]:
vocab_length = len(word_tokenizer.word_index) + 1

vocab_length

In [None]:
maxlen = 100

X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)

In [None]:
embeddings_dictionary = dict()
glove_file = open('a2_glove.6B.100d.txt', encoding="utf8")

for line in glove_file:
    records = line.split()
    word = records[0]
    vector_dimensions = asarray(records[1:], dtype='float32')
    embeddings_dictionary [word] = vector_dimensions
glove_file.close()

In [None]:
embedding_matrix = zeros((vocab_length, 100))
for word, index in word_tokenizer.word_index.items():
    embedding_vector = embeddings_dictionary.get(word)
    if embedding_vector is not None:
        embedding_matrix[index] = embedding_vector

embedding_matrix.shape

In [None]:
type(embedding_matrix)
np.savetxt('embedding_matrix_lstm.csv', embedding_matrix, delimiter=',')

----------
SAVED
_________

In [None]:
loaded_embedding_matrix = np.loadtxt('embedding_matrix_lstm.csv', delimiter=',')

In [None]:
snn_model = Sequential()
embedding_layer = Embedding(vocab_length, 100, weights=[embedding_matrix], input_length=maxlen , trainable=False)

snn_model.add(embedding_layer)

snn_model.add(Flatten())
snn_model.add(Dense(1, activation='sigmoid'))

In [None]:
snn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])

print(snn_model.summary())

In [None]:
snn_model_history = snn_model.fit(X_train, y_train, batch_size=128, epochs=6, verbose=1, validation_split=0.2)

In [None]:
score = snn_model.evaluate(X_test, y_test, verbose=1)
print("Test Score:", score[0])
print("Test Accuracy:", score[1])

In [None]:

plt.plot(snn_model_history.history['acc'])
plt.plot(snn_model_history.history['val_acc'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

plt.plot(snn_model_history.history['loss'])
plt.plot(snn_model_history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

In [None]:
cnn_model = Sequential()

embedding_layer = Embedding(vocab_length, 100, weights=[embedding_matrix], input_length=maxlen , trainable=False)
cnn_model.add(embedding_layer)

cnn_model.add(Conv1D(128, 5, activation='relu'))
cnn_model.add(GlobalMaxPooling1D())
cnn_model.add(Dense(1, activation='sigmoid'))

In [None]:
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
print(cnn_model.summary())

In [None]:
cnn_model_history = cnn_model.fit(X_train, y_train, batch_size=128, epochs=6, verbose=1, validation_split=0.2)

In [None]:
print("Test Score:", score[0])
print("Test Accuracy:", score[1])

In [None]:
plt.plot(cnn_model_history.history['acc'])
plt.plot(cnn_model_history.history['val_acc'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'], loc = 'upper left')
plt.show()

plt.plot(cnn_model_history.history['loss'])
plt.plot(cnn_model_history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'], loc = 'upper left')
plt.show()

In [None]:
rnn_model = Sequential()

embedding_layer = Embedding(vocab_length, 100, weights=[embedding_matrix], input_length=maxlen, trainable=False)
rnn_model.add(embedding_layer)

rnn_model.add(SimpleRNN(128))

rnn_model.add(Dense(1, activation='sigmoid'))

rnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

rnn_model.summary()

In [None]:
rnn_model_history = rnn_model.fit(X_train, y_train, batch_size=128, epochs=6, verbose=1, validation_split=0.2)

In [None]:
score = rnn_model.evaluate(X_test, y_test, verbose=1)
print("Test Score:", score[0])
print("Test Accuracy:", score[1])

In [None]:
lstm_model = Sequential()

# Embedding layer (use pre-trained embeddings like GloVe or FastText, fine-tune during training)
embedding_layer = Embedding(vocab_length, 100, weights=[embedding_matrix], input_length=maxlen, trainable=False)
lstm_model.add(embedding_layer)

# Bidirectional LSTM Layer (captures context from both ends of the sentence)
lstm_model.add(Bidirectional(LSTM(128, return_sequences=True)))

# Attention Layer (self-attention)
# Pass the same tensor as both query and value
attention_output = Attention()([lstm_model.output, lstm_model.output])

# Global Average Pooling to reduce the output dimensionality
lstm_model.add(GlobalAveragePooling1D())

# Dropout Layer (regularization to prevent overfitting)
lstm_model.add(Dropout(0.5))

# Dense Layer (final classification layer)
lstm_model.add(Dense(64, activation='relu'))
lstm_model.add(Dropout(0.5))
lstm_model.add(Dense(1, activation='sigmoid'))

In [None]:
lstm_model.compile(optimizer=Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
print(lstm_model.summary())

In [None]:
lstm_model_history = lstm_model.fit(X_train, y_train, batch_size=128, epochs=6, verbose=1, validation_split=0.2)

In [None]:
score = lstm_model.evaluate(X_test, y_test, verbose=1)

In [None]:
print("Test Score:", score[0])
print("Test Accuracy:", score[1])

In [None]:
plt.plot(lstm_model_history.history['accuracy'])
plt.plot(lstm_model_history.history['val_accuracy'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

plt.plot(lstm_model_history.history['loss'])
plt.plot(lstm_model_history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','test'], loc='upper left')
plt.show()

In [None]:
print()

In [None]:
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Attention, GlobalAveragePooling1D, Dropout, Dense
from tensorflow.keras.optimizers import Adam

def build_model(hp):
    # Define the model architecture
    lstm_model = Sequential()

    # Embedding layer (use pre-trained embeddings like GloVe or FastText, fine-tune during training)
    embedding_layer = Embedding(vocab_length, 100, weights=[embedding_matrix], input_length=maxlen, trainable=False)
    lstm_model.add(embedding_layer)

    # Bidirectional LSTM Layer (captures context from both ends of the sentence)
    lstm_model.add(Bidirectional(LSTM(
        units=hp.Int('lstm_units', min_value=128, max_value=240, step=32), 
        return_sequences=True)))

    # Attention Layer (self-attention)
    attention_output = Attention()([lstm_model.output, lstm_model.output])

    # Global Average Pooling to reduce the output dimensionality
    lstm_model.add(GlobalAveragePooling1D())

    # Dropout Layer (regularization to prevent overfitting)
    lstm_model.add(Dropout(rate=hp.Float('dropout_rate', min_value=0.2, max_value=0.7, step=0.1)))

    # Dense Layer (final classification layer)
    lstm_model.add(Dense(
        units=hp.Int('dense_units', min_value=64, max_value=180, step=32),
        activation='relu'))
    lstm_model.add(Dropout(rate=hp.Float('dropout_rate_2', min_value=0.2, max_value=0.7, step=0.1)))
    lstm_model.add(Dense(1, activation='sigmoid'))

    # Compile the model with an optimizer
    lstm_model.compile(
        optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='LOG')),
        loss='binary_crossentropy',  # Use binary crossentropy for binary classification tasks
        metrics=['accuracy']
    )

    return lstm_model

# Define the tuner
tuner = kt.BayesianOptimization(
    build_model,
    objective='accuracy',
    max_trials=10,  # Number of trials to run
    executions_per_trial=1,  # Run each trial once
    directory='my_dir_2',  # Save results here
    project_name='lstm_bayesian_tuning_2'
)

# Perform the hyperparameter search
tuner.search(X_train, y_train, epochs=2)

# Get the best hyperparameterss
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters: {best_hp.values}")


In [None]:
best_model = tuner.get_best_models(num_models=1)[0]
best_model.save('best_lstm_model_new.h5')

Best Hyperparameters: {'lstm_units': 192, 'dropout_rate': 0.5, 'dense_units': 128, 'dropout_rate_2': 0.30000000000000004, 'learning_rate': 0.0010872353209015178}

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re

# Step 1: Load the pre-trained model
model = load_model('best_lstm_model.h5')

# Step 2: Load the tokenizer used during training (you need to store it or recreate it)
# Assuming you have the tokenizer stored, e.g., word_tokenizer
# Load or create tokenizer (same as during training)
word_tokenizer = Tokenizer()

# If you have a saved tokenizer (saved as pickle, for example):
# import pickle
# with open('tokenizer.pickle', 'rb') as handle:
#     word_tokenizer = pickle.load(handle)

# Step 3: Define your text preprocessing function
def preprocess_input_text(text):
    """
    Preprocess input text (tokenize and pad it).
    """
    # Tokenize the text using the fitted tokenizer
    tokenized_text = word_tokenizer.texts_to_sequences([text])

    # Pad the sequence to ensure uniform input size (based on the training data)
    maxlen = 100  # same as during training
    padded_text = pad_sequences(tokenized_text, padding='post', maxlen=maxlen)
    print(padded_text)
    return padded_text

# Step 4: Define the prediction function
def predict_sentiment(text):
    """
    Predict sentiment or class label for the input text.
    """
    # Preprocess the input text
    processed_text = preprocess_input_text(text)

    # Get the prediction from the model
    prediction = model.predict(processed_text)

    # Assuming binary classification, return a label or probability
    # If you have multiple classes, you can adjust the logic accordingly
    if prediction >= 0.5:
        return [prediction,"Positive"]  # or whatever label corresponds to class 1
    else:
        return [prediction,"Negative"]  # or whatever label corresponds to class 0




In [None]:
# Step 5: Example usage
input_text = "Kill is an hello i am bad text for sentiment analysis."
output = predict_sentiment(input_text)
print(f"Predicted Output: {output}")

In [None]:
from ibm_watson_machine_learning import APIClient
import json
import numpy as np

In [None]:
wml_creds ={
    "apikey": '_KnZKtXJvhFYIq2z5Ot08WKKpfBwbsgyj3CWsxYHJyds',
    "url": "https://us-south.ml.cloud.ibm.com"
}

In [None]:
wml_client = APIClient(wml_credentials=wml_creds)
wml_client.spaces.list()

In [None]:
SPACE_ID ='6760b3b6-140d-46c0-861e-2eebda2dd6ab'
wml_client.set.default_space(SPACE_ID)
print(f"Default space is set to: {SPACE_ID}")

In [None]:
# Set Python Version and Runtime for TensorFlow
software_spec_uid = wml_client.software_specifications.get_id_by_name('tensorflow_rt23.1-py3.10')

# # Setup model meta for custom TensorFlow model
# model_props = {
#     wml_client.repository.ModelMetaNames.NAME: 'FastBert Model', 
#     wml_client.repository.ModelMetaNames.TYPE: 'tensorflow_2.12',
#     wml_client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid
# }
model_dir = 'best_lstm_model.zip'

# Define the location of requirements.txt
requirements_file = 'requirement.txt'

# Define the location of pipeline.py
pipeline_file = 'lstm_pipeline.py'

# Create the client to interact with WML

# Upload the model
model_metadata = {
    wml_client.repository.ModelMetaNames.NAME: "Best_82",
    wml_client.repository.ModelMetaNames.TYPE: "tensorflow_2.12",
    wml_client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid
}

# Store the model
model_uid = wml_client.repository.store_model(model = model_dir, meta_props=model_metadata)

# Print model UID
print(f"Model uploaded with UID: {model_uid}")

In [None]:
# model_details = wml_client.repository.store_model(
#     model='model_package',  # Path to the zip file
#     meta_props=model_props
# )
# model_details

model_uid = model_uid['metadata']['id']

deployment_props = {
    wml_client.deployments.ConfigurationMetaNames.NAME: 'FEMA-SentimentAnalysis',
    wml_client.deployments.ConfigurationMetaNames.ONLINE: {
        "dependencies": {
            "pip_requirements": requirements_file,  # Point to the requirements file
            "python": "3.10"  # Ensure the Python version is appropriate
        },
        "pipeline": pipeline_file  # Ensure the pipeline file is used for inference
    }
}

# Deploy the model
deployment = wml_client.deployments.create(
    artifact_uid=model_uid,
    meta_props=deployment_props
)

# Output result
print(f"Deployment created: {deployment}")



In [None]:
input_text = {
    "input_data": [
        {
            "fields": ["text"],  # Name of the input field (this should match what the model expects)
            "values": [["This is a sample text to analyze."]]  # List of texts you want to predict on
        }
    ]
}


In [None]:
import json
import requests

# 1. Get the Deployment ID and Scoring URL from the deployment response
deployment_uid = deployment['metadata']['id']
deployment_url = deployment['entity']['status']['inference'][0]['url']  # Correct scoring URL

# Set the version query parameter (you should use the correct version date)
version = "2021-06-28"  # Replace this with the version of the Watson API you're using

# 2. Prepare the input data (text)
input_text = {
    "input_data": [
        {
            "fields": ["text"],  # Input field name as expected by the model
            "values": [["This is a test sentence for sentiment analysis."]]  # Example text for prediction
        }
    ]
}

# 3. Prepare the headers for authentication and API version
headers = {
    "Authorization": "_KnZKtXJvhFYIq2z5Ot08WKKpfBwbsgyj3CWsxYHJyds",  # Replace with your IBM Cloud API key
    "Content-Type": "application/json"
}

# Add version to the scoring URL as a query parameter
scoring_url_with_version = f"{deployment_url}?version={version}"

# 4. Send the scoring request via the correct URL (use requests library instead)
response = requests.post(scoring_url_with_version, headers=headers, data=json.dumps(input_text))

# 5. Handle the response and print the prediction
if response.status_code == 200:
    prediction = response.json()
    print(f"Prediction: {prediction}")
else:
    print(f"Failed to get prediction. Status Code: {response.status_code}")
    print(f"Response: {response.text}")


In [None]:
deployment_uid

In [None]:
X_test[0].shape

In [None]:
payload = {
    "input_data": [
        {
            "fields": ["embedding_input"],  # Rename to match the model's expected field name for embeddings
            "values": [X_test[0].tolist()]  # Convert numpy array to list if needed
        }
    ]
}

In [None]:
result = wml_client.deployments.score(deployment_uid, payload)

In [None]:
y_test[0]

In [None]:
result

In [None]:
# Adjust the payload to pass raw text if the pipeline handles embedding
payload = {
    "input_data": [
        {
            "fields": ["text"],  # Assuming the pipeline expects 'text' as input field
            "values": [["This is a test sentence for sentiment analysis."]]  # Raw text
        }
    ]
}

# Perform the inference
result = wml_client.deployments.score(deployment_uid, payload)

# Output the result
print(result)


In [None]:
X_test[0]