# Binary Classification. UCI Sentiment Labelled Sentences

In [1]:
# TensorFlow and tf.keras
import tensorflow as tf

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

print(f"Tensor Flow Version: {tf.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available." if gpu else "NOT AVAIALBLE!!!")
print(tf.config.list_physical_devices())

Tensor Flow Version: 2.8.0
GPU is available.
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Import & Explore the Data (EDA)

In [4]:
colnames = ['sentence','positive']
raw = pd.read_csv('../data/uci_sentiment_lablelled_sentences/imdb_labelled.txt', sep=" \t", lineterminator="\n", names=colnames, engine='python', header=None)
raw

#engine='python' : The default 'c'- engine considers my sep value regex and will not parse it. 

Unnamed: 0,sentence,positive
0,"A very, very, very slow-moving, aimless movie ...",0
1,Not sure who was more lost - the flat characte...,0
2,Attempting artiness with black & white and cle...,0
3,Very little music or anything to speak of.,0
4,The best scene in the movie was when Gerardo i...,1
...,...,...
995,I just got bored watching Jessice Lange take h...,0
996,"Unfortunately, any virtue in this film's produ...",0
997,"In a word, it is embarrassing.",0
998,Exceptionally bad!,0


In [5]:
raw.dtypes

sentence    object
positive     int64
dtype: object

In [6]:
wrangler = raw
wrangler['length'] = wrangler.sentence.str.len()
wrangler_sorted = wrangler.sort_values( by=['length'], ascending=False).head(20)
wrangler_sorted

Unnamed: 0,sentence,positive,length
620,"This is a masterful piece of film-making, with...",1,478
421,This movie is excellent!Angel is beautiful and...,1,380
390,Though The Wind and the Lion is told largely t...,1,335
298,I have to mention this and it is a huge SPOILE...,1,334
428,The use of slow-motion needlessly repeats itse...,0,320
621,"A mature, subtle script that suggests and occa...",1,316
804,The attempts at humor were pitiful and story i...,0,284
243,"The film has great actors, a master director, ...",1,277
309,But when someone strives for greatness and poe...,0,268
803,The acting from all involved and that includes...,0,268


In [7]:
# reading through the 20 longest reviews. Checking for HTML or other examples of characters that need cleaning.
for i in range(0,20):
    print(wrangler_sorted.iloc[i][0])
    print()

This is a masterful piece of film-making, with many themes simmering and occasionally boiling over in this warts and all study of the poet's bohemian, self-indulgent wartime years that span the aerial bombardments of London and the outward tranquillity of a Welsh coastal retreat - the borderlines between friendship, lust and love, dedication to art and experience versus practical concerns, jealousy, rivalry, cowardice and egotism versus heroism and self-sacrifice and more. 

This movie is excellent!Angel is beautiful and Scamp is adorable!His little yelps when hes scared,and the funniest parts are when:Scamp is caught under the curtain and when Angel and Scamp are singing 'Ive Never Had This Feeling Before'.I totally recommend this movie,its coming out on special edition on June 20.The cover has scamp on a garbage can and Angel underneath the lid. 

Though The Wind and the Lion is told largely through the eyes of the son, every member of the family can identify with one of the characte

## Load the Data

In [8]:
training_dataset = (
    tf.data.Dataset.from_tensor_slices(
        (
            tf.cast(raw.sentence.values, tf.string),
            tf.cast(raw.positive.values, tf.int32)
        )
    )
)

for features_tensor, target_tensor in training_dataset:
    print(f'features. {features_tensor} target:{target_tensor}')

Metal device set to: Apple M1
features. b'A very, very, very slow-moving, aimless movie about a distressed, drifting young man. ' target:0
features. b'Not sure who was more lost - the flat characters or the audience, nearly half of whom walked out. ' target:0
features. b'Attempting artiness with black & white and clever camera angles, the movie disappointed - became even more ridiculous - as the acting was poor and the plot and lines almost non-existent. ' target:0
features. b'Very little music or anything to speak of. ' target:0
features. b'The best scene in the movie was when Gerardo is trying to find a song that keeps running through his head. ' target:1
features. b"The rest of the movie lacks art, charm, meaning... If it's about emptiness, it works I guess because it's empty. " target:0
features. b'Wasted two hours. ' target:0
features. b'Saw the movie today and thought it was a good effort, good messages for kids. ' target:1
features. b'A bit predictable. ' target:0
features. b'Lo

2022-03-23 19:44:43.390555: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-23 19:44:43.390862: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [17]:
for elem in training_dataset:
    print(elem[0])

tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype

## Prepare Data for Training (Text Vectorization)

In [None]:
VOCAB_SIZE = 10000
encoder = tf.keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE)
encoder.adapt(training_dataset.map(lambda text, label: text))

In [None]:
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

In [None]:
for example, label in training_dataset.take(3):
    print('text:', example.numpy())
    print('label:', label.numpy())

In [None]:
encoded_example = encoder(example)[:3].numpy()
encoded_example

In [None]:
for n in range(3):
  print("Original: ", example[n].numpy())
  print("Round-trip: ", " ".join(vocab[encoded_example[n]]))
  print()

In [None]:
for elem in 

## Create the Model

## Train the Model

## Evaluate the Model

## Plots of Accuracy & Loss Over Time

## Export the Model

## Make Predictions