In [None]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization, LSTM, Dropout, Bidirectional, Dense, Embedding
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

from google.colab import drive
drive.mount('/content/drive')

df = pd.read_csv("/content/drive/MyDrive/train.csv/train.csv")
df.head()

X = df['comment_text']
y = df[df.columns[2:]].values

MAX_FEATURES = 200000  # number of words in the vocab

vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
                               output_sequence_length=1800,
                               output_mode='int')

vectorizer.adapt(X.values)
vectorized_text = vectorizer(X.values)

# MCSHBAP - map, cache, shuffle, batch, prefetch from_tensor_slices, list_file
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
dataset = dataset.cache()
dataset = dataset.shuffle(160000)
dataset = dataset.batch(32)  # Adjust the batch size
dataset = dataset.prefetch(8)  # helps bottlenecks

train = dataset.take(int(len(dataset) * 0.7))
val = dataset.skip(int(len(dataset) * 0.7)).take(int(len(dataset) * 0.2))
test = dataset.skip(int(len(dataset) * 0.9)).take(int(len(dataset) * 0.1))

model = Sequential()
# Create the embedding layer
model.add(Embedding(MAX_FEATURES + 1, 64))  # Adjust the embedding dimension
# Bidirectional LSTM Layer
model.add(Bidirectional(LSTM(64, activation='tanh')))  # Adjust the number of units
# Feature extractor Fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))  # Adjust the dropout rate
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
# Final layer
model.add(Dense(6, activation='sigmoid'))

custom_optimizer = Adam(learning_rate=0.001)  # Adjust the learning rate
model.compile(loss='BinaryCrossentropy', optimizer=custom_optimizer)
history = model.fit(train, epochs=5, validation_data=val)  # Adjust the number of epochs

# Assuming you have a TextVectorization layer named 'vectorizer'
text_data = 'You freaking'

# Use the vectorizer to convert the text into numerical representation
input_text = vectorizer([text_data])

# Make the prediction
prediction = model.predict(input_text)

# Convert prediction to binary values
binary_prediction = (prediction > 0.5).astype(int)

print(binary_prediction)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[[1 0 0 0 0 0]]


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Assuming you have a TextVectorization layer named 'vectorizer'
text_data = 'Why are you crook?'

# Use the vectorizer to convert the text into numerical representation
input_text = vectorizer([text_data])

# Make the prediction
prediction = model.predict(input_text)

# Convert prediction to binary values
binary_prediction = (prediction > 0.5).astype(int)

print(binary_prediction)


[[1 0 0 0 1 0]]


In [None]:
# Assuming you have a TextVectorization layer named 'vectorizer'
text_data = 'piss off'

# Use the vectorizer to convert the text into numerical representation
input_text = vectorizer([text_data])

# Make the prediction
prediction = model.predict(input_text)

# Convert prediction to binary values
binary_prediction = (prediction > 0.5).astype(int)

print(binary_prediction)


[[1 0 1 0 0 0]]


In [None]:
# Assuming you have a TextVectorization layer named 'vectorizer'
text_data ="Rubbish video"

# Use the vectorizer to convert the text into numerical representation
input_text = vectorizer([text_data])

# Make the prediction
prediction = model.predict(input_text)

# Convert prediction to binary values
binary_prediction = (prediction > 0.5).astype(int)

print(binary_prediction)


[[0 0 0 0 0 0]]


In [None]:

# Assuming you have a TextVectorization layer named 'vectorizer'
text_data = "Damn Video"

# Use the vectorizer to convert the text into numerical representation
input_text = vectorizer([text_data])

# Make the prediction
prediction = model.predict(input_text)

# Convert prediction to binary values
binary_prediction = (prediction > 0.5).astype(int)

print(binary_prediction)


[[0 0 0 0 0 0]]


In [None]:

# Assuming you have a TextVectorization layer named 'vectorizer'
text_data = "you are shit"

# Use the vectorizer to convert the text into numerical representation
input_text = vectorizer([text_data])

# Make the prediction
prediction = model.predict(input_text)

# Convert prediction to binary values
binary_prediction = (prediction > 0.5).astype(int)

print(binary_prediction)


[[1 0 1 0 1 0]]


In [None]:
from sklearn.metrics import accuracy_score

# Assuming you have the test data loaded
test_X, test_y = test.as_numpy_iterator().next()

# Make predictions on the test set
predictions = model.predict(test_X)

# Convert predictions to binary values
binary_predictions = (predictions > 0.5).astype(int)

# Flatten the true labels
true_labels = test_y.flatten()

# Flatten the predicted labels
predicted_labels = binary_predictions.flatten()

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 99.48%
