In [1]:
# Import the necessary libraries
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import json
from tensorflow.keras.regularizers import l2

In [2]:
# Read the data from the csv files
train_df = pd.read_csv(r"train.csv")
test_df = pd.read_csv(r"test.csv")

train_data = train_df['text'].tolist()
train_labels = train_df['label'].tolist() # 0 = negative, 1 = positive

test_data = test_df['text'].tolist()

#print(len(train_data))
#print(len(train_labels))
#print(len(test_data))


In [3]:
# Convert to dataset
train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))

In [4]:
train_examples_batch, train_labels_batch = next(iter(train_dataset.batch(10)))
train_examples_batch

<tf.Tensor: shape=(10,), dtype=string, numpy=
array([b'i really do recommend this to anyone in need of a new player',
       b'very good every day camera fits nicely in the pocket of my jeans and takes quality photos',
       b'but , dollar for dollar , this dvd player is probably the best out there',
       b"i got this phone yesterday and didn ' t find any problems with it yet",
       b'1 ) price gb of storage',
       b'one cabinet shop has been using one regularly in a router table for 11 years without a problem',
       b'i will say that the os that the phone runs does have a few issues',
       b'this model appears to be especially good',
       b"i find that it is stable in my hands and its ' weight actually contributes to that stability",
       b'the catch is that , while it plays movies just fine , it has refused to read second discs with the movie extras on them on the two occasions when i tried to do that'],
      dtype=object)>

In [5]:
train_labels_batch

<tf.Tensor: shape=(10,), dtype=int32, numpy=array([1, 1, 1, 1, 1, 1, 0, 1, 1, 0])>

In [6]:
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_dataset.cache().prefetch(buffer_size=AUTOTUNE)


In [7]:
embedding = "https://tfhub.dev/google/nnlm-en-dim50/2"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
hub_layer(train_examples_batch[:3])

<tf.Tensor: shape=(3, 50), dtype=float32, numpy=
array([[ 0.19757974, -0.0186452 , -0.08237637,  0.3675822 , -0.04530753,
         0.20239483,  0.08419446,  0.04277235, -0.1744885 ,  0.42727035,
        -0.05284091,  0.13069119, -0.13883512, -0.11575663,  0.00941333,
         0.05545775, -0.10175671,  0.08229759,  0.10384491, -0.21822135,
        -0.02120312,  0.08206433,  0.15686706, -0.02654655, -0.14630347,
         0.19497168, -0.70362455,  0.06724163, -0.04180326, -0.1357327 ,
        -0.00549761,  0.20079505,  0.07402345, -0.13822946, -0.12910356,
        -0.06940438,  0.17769675, -0.22498287, -0.02418699, -0.32836828,
         0.1543761 ,  0.08618873, -0.11317721,  0.16600314, -0.06893973,
        -0.21479215, -0.02556792, -0.20228562,  0.12176652,  0.2922582 ],
       [ 0.13493924,  0.08447167,  0.10208606,  0.33875805, -0.39271924,
        -0.11495312,  0.2911141 , -0.12646888, -0.21781461,  0.14495888,
         0.24699204,  0.21629849, -0.07006254,  0.11604493, -0.06587031,
 

In [8]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=l2(0.001)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(32, activation='relu', kernel_regularizer=l2(0.001)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(1, kernel_regularizer=l2(0.001)))

model.summary()

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 50)                48190600  
                                                                 
 dense (Dense)               (None, 64)                3264      
                                                                 
 batch_normalization (BatchN  (None, 64)               256       
 ormalization)                                                   
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 batch_normalization_1 (Batc  (None, 32)               128       
 hNormalization)                                        

In [9]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=(tf.metrics.BinaryAccuracy(threshold=0.0)))

In [10]:
history = model.fit(train_dataset.shuffle(500).batch(50),
                    epochs=20,
                    verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [11]:
probability_model = tf.keras.Sequential([model, 
                                         tf.keras.layers.Activation('sigmoid')])


In [12]:
predictions = probability_model.predict(test_data)
predictions



array([[0.7498362 ],
       [0.81120735],
       [0.9272246 ],
       [0.88177866],
       [0.91660416],
       [0.37024584],
       [0.5670207 ],
       [0.8485461 ],
       [0.6456509 ],
       [0.8689728 ],
       [0.7103296 ],
       [0.6784979 ],
       [0.76707596],
       [0.96740067],
       [0.8067341 ],
       [0.89576834],
       [0.3853816 ],
       [0.62183017],
       [0.8497489 ],
       [0.5206521 ],
       [0.9298378 ],
       [0.92433745],
       [0.863692  ],
       [0.87969023],
       [0.94096446],
       [0.9445191 ],
       [0.952483  ],
       [0.804348  ],
       [0.9535071 ],
       [0.94792366],
       [0.85923904],
       [0.3319982 ],
       [0.8467337 ],
       [0.95850015],
       [0.8270234 ],
       [0.42124242],
       [0.90024835],
       [0.94142175],
       [0.6938723 ],
       [0.63876617],
       [0.6163538 ],
       [0.12690559],
       [0.6392217 ],
       [0.49973494],
       [0.6346485 ],
       [0.96791095],
       [0.86841446],
       [0.884

In [13]:
# Create JSON file with predictions

# Create a dictionary with the predictions
predictions_dict = {}
for i in range(0, len(predictions)):
    predictions_dict[str(i)] = (int)(np.round(predictions[i]))

# Save the dictionary to a JSON file
with open('predictions4.json', 'w') as f:
    json.dump(predictions_dict, f)