In [4]:
import numpy as np

texts = ["I love programming",
         "Python is awesome",
         "I hate bugs",
         "Debugging is fun",
         "I love solving problems",
         "I don't like errors"]
labels = [1, 1, 0, 1, 1, 0]


In [5]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [8]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)



In [9]:
sequences = tokenizer.texts_to_sequences(texts)
sequences

[[1, 2, 4], [5, 3, 6], [1, 7, 8], [9, 3, 10], [1, 2, 11, 12], [1, 13, 14, 15]]

In [11]:
max_length = max([len(sequence)
 for sequence in sequences])
max_length

4

In [12]:
X = pad_sequences(sequences, maxlen=max_length,
                  padding='post')
X

array([[ 1,  2,  4,  0],
       [ 5,  3,  6,  0],
       [ 1,  7,  8,  0],
       [ 9,  3, 10,  0],
       [ 1,  2, 11, 12],
       [ 1, 13, 14, 15]], dtype=int32)

In [13]:
y = np.array(labels)
y

array([1, 1, 0, 1, 1, 0])

In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers  import Embedding, Dense, Flatten


In [14]:
tokenizer.word_index

{'i': 1,
 'love': 2,
 'is': 3,
 'programming': 4,
 'python': 5,
 'awesome': 6,
 'hate': 7,
 'bugs': 8,
 'debugging': 9,
 'fun': 10,
 'solving': 11,
 'problems': 12,
 "don't": 13,
 'like': 14,
 'errors': 15}

In [18]:
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1,
                    output_dim=8,
                    input_length=max_length))

model.add(Flatten())
model.add(Dense(10, activation='relu'))

model.add(Dense(1, activation='sigmoid'))


In [19]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [20]:
model.fit(X, y,epochs=20, batch_size=2)


Epoch 1/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.4375 - loss: 0.6938
Epoch 2/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7708 - loss: 0.6869 
Epoch 3/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9167 - loss: 0.6818
Epoch 4/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8542 - loss: 0.6784
Epoch 5/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8542 - loss: 0.6736
Epoch 6/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8542 - loss: 0.6748
Epoch 7/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7292 - loss: 0.6710
Epoch 8/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9167 - loss: 0.6596
Epoch 9/20
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x780aad620050>

In [21]:
sample_text = "i love programming"
sample_sequence = tokenizer.texts_to_sequences ([sample_text])  # Tokenize the sample text
sample_padded = pad_sequences(sample_sequence, maxlen=max_length, padding='post')  # Pad the sequence
prediction = model.predict(sample_padded)
if prediction > 0.5:
  print('positive')
else:
  print('negative')


print(prediction[0][0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
positive
0.5627484


In [None]:
from tensorflow.keras.layers import Dropout

model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=8, input_length=max_length))
model.add(Dropout(0.5))  # Dropout layer to prevent overfitting
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=20, batch_size=2)


Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.2623 - loss: 0.7010    
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5642 - loss: 0.6928 
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5267 - loss: 0.6911     
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7243 - loss: 0.6879 
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6399 - loss: 0.6870 
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5409 - loss: 0.6893     
Epoch 7/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7260 - loss: 0.6828 
Epoch 8/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8634 - loss: 0.6784 
Epoch 9/20
[1m16/16[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x7facf20d1e90>

In [23]:
sample_text = "the code is very easy"
sample_sequence = tokenizer.texts_to_sequences([sample_text])  # Tokenize the sample text
sample_padded = pad_sequences(sample_sequence, maxlen=max_length, padding='post')  # Pad the sequence
prediction = model.predict(sample_padded)
if prediction > 0.5:
  print('positive')
else:
  print('negative')


print(prediction[0][0])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
positive
0.51905286


In [24]:
# Expanded training data
texts = [
    "I love programming",             # Positive
    "Python is awesome",              # Positive
    "I hate bugs",                    # Negative
    "Debugging is fun",               # Positive
    "I love solving problems",        # Positive
    "I don't like errors",            # Negative
    "Coding is exciting",             # Positive
    "I hate when the code breaks",    # Negative
    "Software development is amazing", # Positive
    "I enjoy learning new languages", # Positive
    "Fixing bugs is so satisfying",   # Positive
    "I don't understand this error",  # Negative
    "I love refactoring code",        # Positive
    "Writing clean code feels great", # Positive
    "I can't stand slow computers",   # Negative
    "I'm frustrated with this bug",   # Negative
    "I hate syntax errors",           # Negative
    "I love the challenge of algorithms", # Positive
    "Programming is a fun skill",     # Positive
    "I am so happy my code works",    # Positive
    "I don't like messy code",        # Negative
    "Refactoring makes my code better", # Positive
    "The debugger is my friend",      # Positive
    "I can't figure out this logic",  # Negative
    "This app is crashing",           # Negative
    "Why won't my code compile?",     # Negative
    "I love building projects",       # Positive
    "I hate debugging this issue",    # Negative
    "Coding is a lot of fun",         # Positive
    "I dislike bad error messages",   # Negative
    "Testing is an important part of development", # Positive
    "I don't like working on legacy code"  # Negative
]

labels = [
    1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1,1,0
]  # 1 = Positive, 0 = Negative


In [25]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

In [26]:
sequences= tokenizer.texts_to_sequences(texts)
max_length = max([len(sequence) for sequence in sequences])

In [27]:
import numpy as np
X = pad_sequences(sequences, maxlen=max_length, padding='post')
y = np.array(labels)
X

array([[ 1,  4, 13,  0,  0,  0,  0],
       [24,  2, 25,  0,  0,  0,  0],
       [ 1,  6, 14,  0,  0,  0,  0],
       [15,  2,  9,  0,  0,  0,  0],
       [ 1,  4, 26, 27,  0,  0,  0],
       [ 1,  7, 10, 16,  0,  0,  0],
       [17,  2, 28,  0,  0,  0,  0],
       [ 1,  6, 29, 11,  3, 30,  0],
       [31, 18,  2, 32,  0,  0,  0],
       [ 1, 33, 34, 35, 36,  0,  0],
       [37, 14,  2, 19, 38,  0,  0],
       [ 1,  7, 39,  5, 20,  0,  0],
       [ 1,  4, 21,  3,  0,  0,  0],
       [40, 41,  3, 42, 43,  0,  0],
       [ 1, 22, 44, 45, 46,  0,  0],
       [47, 48, 49,  5, 50,  0,  0],
       [ 1,  6, 51, 16,  0,  0,  0],
       [ 1,  4, 11, 52, 12, 53,  0],
       [13,  2, 23,  9, 54,  0,  0],
       [ 1, 55, 19, 56,  8,  3, 57],
       [ 1,  7, 10, 58,  3,  0,  0],
       [21, 59,  8,  3, 60,  0,  0],
       [11, 61,  2,  8, 62,  0,  0],
       [ 1, 22, 63, 64,  5, 65,  0],
       [ 5, 66,  2, 67,  0,  0,  0],
       [68, 69,  8,  3, 70,  0,  0],
       [ 1,  4, 71, 72,  0,  0,  0],
 

In [29]:
model=Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=8, input_length=max_length))
#model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [30]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [31]:
model.fit(X, y, epochs=50, batch_size=30)

Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.5319 - loss: 0.6874
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5639 - loss: 0.6829
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5639 - loss: 0.6789
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.5958 - loss: 0.6751
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - accuracy: 0.5750 - loss: 0.6712
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5528 - loss: 0.6697
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.5639 - loss: 0.6661
Epoch 8/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - accuracy: 0.5639 - loss: 0.6634
Epoch 9/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x780aad5fc9d0>

In [35]:
sample_text = "coding is not interested"
sample_sequence = tokenizer.texts_to_sequences([sample_text])  # Tokenize the sample text
sample_padded = pad_sequences(sample_sequence, maxlen=max_length, padding='post')  # Pad the sequence
prediction = model.predict(sample_padded)
if prediction > 0.5:
  print('positive')
else:
  print('negative')


print(prediction[0][0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
positive
0.7344286


In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Expanded training data
texts = [
    "I love programming",             # Positive
    "Python is awesome",              # Positive
    "I hate bugs",                    # Negative
    "Debugging is fun",               # Positive
    "I love solving problems",        # Positive
    "I don't like errors",            # Negative
    "Coding is exciting",             # Positive
    "I hate when the code breaks",    # Negative
    "Software development is amazing", # Positive
    "I enjoy learning new languages", # Positive
    "Fixing bugs is so satisfying",   # Positive
    "I don't understand this error",  # Negative
    "I love refactoring code",        # Positive
    "Writing clean code feels great", # Positive
    "I can't stand slow computers",   # Negative
    "I'm frustrated with this bug",   # Negative
    "I hate syntax errors",           # Negative
    "I love the challenge of algorithms", # Positive
    "Programming is a fun skill",     # Positive
    "I am so happy my code works",    # Positive
    "I don't like messy code",        # Negative
    "Refactoring makes my code better", # Positive
    "The debugger is my friend",      # Positive
    "I can't figure out this logic",  # Negative
    "This app is crashing",           # Negative
    "Why won't my code compile?",     # Negative
    "I love building projects",       # Positive
    "I hate debugging this issue",    # Negative
    "Coding is a lot of fun",         # Positive
    "I dislike bad error messages",   # Negative
    "Testing is an important part of development", # Positive
    "I don't like working on legacy code"  # Negative
]

labels = [
    1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1,1,0
]  # 1 = Positive, 0 = Negative

# Step 2: Preprocess the Data
tokenizer = Tokenizer(oov_token="<OOV>")  # Handle out-of-vocabulary words
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Determine the maximum length of a sequence
max_length = max([len(sequence) for sequence in sequences])

# Pad the sequences to ensure they all have the same length
X = pad_sequences(sequences, maxlen=max_length, padding='post')

# Ensure the labels array has the same number of samples as X
y = np.array(labels)

# Step 3: Build the Model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=8, input_length=max_length))
model.add(Dropout(0.5))  # Add dropout to reduce overfitting
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary classification

# Compile the Model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 4: Train the Model
model.fit(X, y, epochs=10, batch_size=2)

# Step 5: Evaluate the Model
loss, accuracy = model.evaluate(X, y)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Step 6: Check for OOV words in a new input
sample_text = "understading program is very tuff"
sample_sequence = tokenizer.texts_to_sequences([sample_text])

# Check if the sequence has OOV tokens
print("Tokenized sequence with possible OOV words:", sample_sequence)

# Count OOV words in the sample
oov_count = sum(1 for word_id in sample_sequence[0] if word_id == tokenizer.word_index['<OOV>'])
if oov_count > 0:
    print(f"Warning: The sample contains {oov_count} OOV words.")
else:
    print("The sample contains no OOV words.")

#sample_text = "not interesed coding"
#sample_sequence = tokenizer.texts_to_sequences([sample_text])  # Tokenize the sample text
#sample_padded = pad_sequences(sample_sequence, maxlen=max_length, padding='post')  # Pad the sequence
prediction = model.predict(sample_padded)
if prediction > 0.5:
  print('positive')
else:
  print('negative')


Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6022 - loss: 0.6927
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4900 - loss: 0.6893     
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7364 - loss: 0.6799 
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6969 - loss: 0.6768 
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6733 - loss: 0.6736     
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8219 - loss: 0.6538 
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7886 - loss: 0.6637 
Epoch 8/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8778 - loss: 0.6423 
Epoch 9/10
[1m16/16[0m [32m━━━━━━━━━━━

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Expanded training data
texts = [
    "I love programming",
    "Python is awesome",
    "I hate bugs",
    "Debugging is fun",
    "I love solving problems",
    "I don't like errors",
    "Coding is exciting",
    "I hate when the code breaks",
    "Software development is amazing",
    "I enjoy learning new languages",
    "Fixing bugs is so satisfying",
    "I don't understand this error",
    "I love refactoring code",
    "Writing clean code feels great",
    "I can't stand slow computers",
    "I'm frustrated with this bug",
    "I hate syntax errors",
    "I love the challenge of algorithms",
    "Programming is a fun skill",
    "I am so happy my code works",
    "I don't like messy code",
    "Refactoring makes my code better",
    "The debugger is my friend",
    "I can't figure out this logic",
    "This app is crashing",
    "Why won't my code compile?",
    "I love building projects",
    "I hate debugging this issue",
    "Coding is a lot of fun",
    "I dislike bad error messages",
    "Testing is an important part of development",
    "I don't like working on legacy code"
]

labels = [
    1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1,1,0
]  # 1 = Positive, 0 = Negative

# Step 2: Preprocess the Data
tokenizer = Tokenizer(oov_token="<OOV>")  # Handle out-of-vocabulary words
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Determine the maximum length of a sequence
max_length = max([len(sequence) for sequence in sequences])

# Pad the sequences to ensure they all have the same length
X = pad_sequences(sequences, maxlen=max_length, padding='post')

# Ensure the labels array has the same number of samples as X
y = np.array(labels)

# Step 3: Build the Model with LSTM
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1,
                    output_dim=8,
                    input_length=max_length))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Step 4: Train the Model
model.fit(X, y, epochs=10, batch_size=2)

# Step 5: Evaluate the Model
loss, accuracy = model.evaluate(X, y)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Step 6: Make Predictions on New Data
sample_text = "I love programming but sometimes I face errors"
sample_sequence = tokenizer.texts_to_sequences([sample_text])
sample_padded = pad_sequences(sample_sequence, maxlen=max_length, padding='post')
prediction = model.predict(sample_padded)
predicted_label = (prediction > 0.5).astype("int32")
print(f"Prediction (1 = Positive, 0 = Negative): {predicted_label[0][0]}")


Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.7239 - loss: 0.6915
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6660 - loss: 0.6865
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6031 - loss: 0.6871
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4574 - loss: 0.6950
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5048 - loss: 0.6881
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5300 - loss: 0.6741
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5300 - loss: 0.6627
Epoch 8/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5524 - loss: 0.5769
Epoch 9/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0

In [None]:
sample_text = "not interesed coding"
sample_sequence = tokenizer.texts_to_sequences([sample_text])  # Tokenize the sample text
sample_padded = pad_sequences(sample_sequence, maxlen=max_length, padding='post')  # Pad the sequence
prediction = model.predict(sample_padded)
if prediction > 0.5:
  print('positive')
else:
  print('negative')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
positive


In [22]:
import numpy as np

# 50 positive (label = 1), 50 negative (label = 0)
labels = np.array([1]*50 + [0]*50)

print(labels)

[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
