# TensorFlow Neural Network Text Classification

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_hub as hub

In [42]:
headers = ["label", "title", "text"] 
data = pd.read_csv("Dataset Pertama/Dataset Pertama_train.csv", header=None, names=headers)

In [43]:
data.head()

Unnamed: 0,label,title,text
0,2,Stuning even for the non-gamer,This sound track was beautiful! It paints the ...
1,2,The best soundtrack ever to anything.,I'm reading a lot of reviews saying that this ...
2,2,Amazing!,This soundtrack is my favorite music of all ti...
3,2,Excellent Soundtrack,I truly like this soundtrack and I enjoy video...
4,2,"Remember, Pull Your Jaw Off The Floor After He...","If you've played the game, you know how divine..."


In [44]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3600000 entries, 0 to 3599999
Data columns (total 3 columns):
 #   Column  Dtype 
---  ------  ----- 
 0   label   int64 
 1   title   object
 2   text    object
dtypes: int64(1), object(2)
memory usage: 82.4+ MB


In [45]:
data.head()

Unnamed: 0,label,title,text
0,2,Stuning even for the non-gamer,This sound track was beautiful! It paints the ...
1,2,The best soundtrack ever to anything.,I'm reading a lot of reviews saying that this ...
2,2,Amazing!,This soundtrack is my favorite music of all ti...
3,2,Excellent Soundtrack,I truly like this soundtrack and I enjoy video...
4,2,"Remember, Pull Your Jaw Off The Floor After He...","If you've played the game, you know how divine..."


In [46]:
train, val, test = np.split(data.sample(frac=1), [int(0.8*len(data)), int(0.9*len(data))])

  return bound(*args, **kwds)


In [47]:
len(train), len(val), len(test)

(2880000, 360000, 360000)

In [48]:
def df_to_dataset(dataframe, shuffle=True, batch_size=8112):
    df = dataframe.copy()
    labels = df.pop('label').values
    df["combined_text"] = df["title"] + " " + df["text"]
    text_data = df["combined_text"].astype(str).values 
    ds = tf.data.Dataset.from_tensor_slices((text_data, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(df))
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

In [49]:
train_data = df_to_dataset(train)
val_data = df_to_dataset(val)
test_data = df_to_dataset(test)

In [50]:
embedding = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(embedding, dtype=tf.string, trainable=True)

In [51]:
hub_layer(list(train_data)[0][0])

<tf.Tensor: shape=(8112, 50), dtype=float32, numpy=
array([[ 0.22393766,  0.16252904, -0.13703465, ..., -0.22534694,
        -0.04456679,  0.2940237 ],
       [ 0.88304293,  0.03469108,  0.03308841, ..., -0.32243773,
         0.2971019 ,  0.21285254],
       [ 0.20804287,  0.17614673, -0.10090926, ..., -0.08088206,
         0.2617907 ,  0.19353409],
       ...,
       [ 0.6389604 ,  0.20014927,  0.12062542, ..., -0.38112617,
         0.1652788 ,  0.17895806],
       [ 0.88293046,  0.02164111, -0.05593093, ..., -0.2426435 ,
        -0.04641026, -0.05770803],
       [ 0.17232743,  0.15156707, -0.06089139, ..., -0.15017852,
         0.02013906,  0.13525479]], dtype=float32)>

In [52]:
model = tf.keras.Sequential([
     tf.keras.layers.Lambda(lambda x: hub_layer(x)),
     tf.keras.layers.Dense(16, activation='relu'),
      tf.keras.layers.Dense(16, activation='relu'),
     tf.keras.layers.Dense(1, activation='sigmoid')
])

In [53]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=["accuracy"]
)

In [54]:
model.evaluate(train_data)

[1m356/356[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 80ms/step - accuracy: 0.2819 - loss: 0.6808


[0.6807708144187927, 0.28209272027015686]

In [55]:
model.evaluate(val_data)

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 72ms/step - accuracy: 0.2837 - loss: 0.6804


[0.6807979345321655, 0.28261667490005493]

In [56]:
history = model.fit(train_data, epochs=10, validation_data=val_data)

Epoch 1/10
[1m356/356[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 86ms/step - accuracy: 0.4882 - loss: -7.2805 - val_accuracy: 0.4994 - val_loss: -132.1070
Epoch 2/10
[1m356/356[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 87ms/step - accuracy: 0.5002 - loss: -319.4212 - val_accuracy: 0.4994 - val_loss: -1323.9109
Epoch 3/10
[1m356/356[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 83ms/step - accuracy: 0.5003 - loss: -1974.9924 - val_accuracy: 0.4994 - val_loss: -4627.9932
Epoch 4/10
[1m356/356[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 85ms/step - accuracy: 0.4999 - loss: -5931.4214 - val_accuracy: 0.4994 - val_loss: -10673.3877
Epoch 5/10
[1m356/356[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 85ms/step - accuracy: 0.4999 - loss: -12733.2422 - val_accuracy: 0.4994 - val_loss: -19867.2461
Epoch 6/10
[1m356/356[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 85ms/step - accuracy: 0.4997 - loss: -22779.4395 - val_accuracy:

In [57]:
import tensorflow as tf

print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available: 0


In [58]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Set TensorFlow to only use the first GPU (if multiple are available)
        tf.config.experimental.set_memory_growth(gpus[0], True)
        tf.config.set_visible_devices(gpus[0], 'GPU')
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)


In [59]:
from tensorflow.python.client import device_lib

device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 11755084264558383456
 xla_global_id: -1]

In [60]:
import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
    print("Name:", gpu.name, "  Type:", gpu.device_type)