In [1]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print("Available GPUs:", tf.config.list_physical_devices('GPU'))

TensorFlow version: 2.9.0
Num GPUs Available: 1
Available GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2374287241800410951
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1405950158
locality {
  bus_id: 1
  links {
  }
}
incarnation: 11153364547114261673
physical_device_desc: "device: 0, name: GeForce MX150, pci bus id: 0000:01:00.0, compute capability: 6.1"
xla_global_id: 416903419
]


In [4]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Memory growth set for GPU.")
    except RuntimeError as e:
        print(e)

Memory growth set for GPU.


In [7]:
pip install numpy matplotlib pandas scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [8]:
import numpy as np
texts = np.load('clean_texts.npy', allow_pickle=True)
labels = np.load('clean_labels.npy')

In [18]:
print("Number of texts:", len(texts))
print("Example:", texts[0])
print("Number of texts:", len(labels))

Number of texts: 1804512
Example: this is so cool its like would you want your mother to read this really great idea well done
Number of texts: 1804512


In [20]:
print(type(texts))  
print(isinstance(texts, np.ndarray)) 

<class 'numpy.ndarray'>
True


In [22]:
MAX_WORDS= 10000

In [24]:
from tensorflow.keras.layers import TextVectorization

# Create the vectorizer
vectorizer = TextVectorization(
    max_tokens=MAX_WORDS,
    output_sequence_length=200,  
    output_mode='int'
)

# Vectorizer training on texts
vectorizer.adapt(texts)

In [25]:
vectorizer('Hello, Deep learing is non eazy')

<tf.Tensor: shape=(200,), dtype=int64, numpy=
array([3506, 1230,    1,    7, 1328,    1,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
     

# Convert data to Dataset
dataset = tf.data.Dataset.from_tensor_slices((texts, labels))

dataset = dataset.map(lambda x, y: (vectorizer(x), y), num_parallel_calls=tf.data.AUTOTUNE)

# Data organization
dataset = dataset.cache()
dataset = dataset.shuffle(10000)
dataset = dataset.batch(32)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

In [28]:
# Remove cache and/or shuffle (temporarily for testing):
dataset = tf.data.Dataset.from_tensor_slices((texts, labels))
dataset = dataset.map(lambda x, y: (vectorizer(x), y), num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(16)
dataset = dataset.prefetch(tf.data.AUTOTUNE)

In [29]:
# Split into train / val / test
dataset_size = len(list(dataset))  
train_size = int(dataset_size * 0.7)
val_size = int(dataset_size * 0.2)

train = dataset.take(train_size)
val = dataset.skip(train_size).take(val_size)
test = dataset.skip(train_size + val_size)

In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dropout, Bidirectional, Dense, Embedding

In [31]:
model = Sequential()
model.add(Input(shape=(200,)))
# Create the embedding layer
model.add(Embedding(input_dim=MAX_WORDS+1, output_dim=32, input_length=200))
# Bidirectional LSTM Layer
model.add(Bidirectional(LSTM(32, activation='tanh')))
# Feature extractor Fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
# Final layer
model.add(Dense(6, activation='sigmoid'))

In [32]:
model.compile(loss='BinaryCrossentropy', optimizer='Adam')

In [None]:
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=2,
    restore_best_weights=True
)

In [None]:
history = model.fit(train, 
                    epochs=4, 
                    validation_data=val,
                    batch_size=32,
                    callbacks=[early_stop]       
                   )

In [None]:
model.save('final_model.h5')
print("Model saved successfully ")

In [None]:
plt.figure(figsize=(8,5))
pd.DataFrame(history.history).plot()
plt.title("Training History")
plt.xlabel("Epoch")
plt.ylabel("Loss / Accuracy")
plt.grid(True)
plt.show()