## Loading Libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras import layers
import os

In [18]:
BASE_PATH = './data/'
datasetpath = f'{BASE_PATH}/movie.csv'
os.listdir(BASE_PATH)

['movie.csv']

In [4]:
df = pd.read_csv(datasetpath, encoding='utf-8')
df.head()

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1


In [29]:
df.loc[0, 'text']

'I grew up (b. 1965) watching and loving the Thunderbirds. All my mates at school watched. We played "Thunderbirds" before school, during lunch and after school. We all wanted to be Virgil or Scott. No one wanted to be Alan. Counting down from 5 became an art form. I took my children to see the movie hoping they would get a glimpse of what I loved as a child. How bitterly disappointing. The only high point was the snappy theme tune. Not that it could compare with the original score of the Thunderbirds. Thankfully early Saturday mornings one television channel still plays reruns of the series Gerry Anderson and his wife created. Jonatha Frakes should hand in his directors chair, his version was completely hopeless. A waste of film. Utter rubbish. A CGI remake may be acceptable but replacing marionettes with Homo sapiens subsp. sapiens was a huge error of judgment.'

In [5]:
df.describe()

Unnamed: 0,label
count,40000.0
mean,0.499525
std,0.500006
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40000 entries, 0 to 39999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    40000 non-null  object
 1   label   40000 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 625.1+ KB


In [7]:
df.isnull()

Unnamed: 0,text,label
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
39995,False,False
39996,False,False
39997,False,False
39998,False,False


In [8]:
df.columns

Index(['text', 'label'], dtype='object')

In [10]:
import nltk
from nltk import word_tokenize
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

In [11]:
nltk.download('stopwords')
stop_words = stopwords.words('english')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\renua\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [12]:
train_x, test_x, y_train, y_test = train_test_split(df.text,df.label, test_size=0.25,random_state=123)

In [13]:
#TF-IDF
tfv = TfidfVectorizer(min_df=3,  max_features=None, strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
                    ngram_range=(1, 3), use_idf=1,smooth_idf=1,sublinear_tf=1, stop_words = 'english')

tfv.fit(list(train_x) + list(test_x))
train_x_tfv =  tfv.transform(train_x) 
test_x_tfv = tfv.transform(test_x)

In [14]:
train_x.shape

(30000,)

In [15]:
y_train.shape

(30000,)

In [16]:
test_x.shape, y_test.shape

((10000,), (10000,))

In [26]:
dataset = tf.data.experimental.make_csv_dataset(datasetpath, batch_size=32,
                                                select_columns=['\ufefftext', 'label'],
                                                label_name='label')

<TakeDataset element_spec=(OrderedDict([('\ufefftext', TensorSpec(shape=(32,), dtype=tf.string, name=None))]), TensorSpec(shape=(32,), dtype=tf.int32, name=None))>


In [36]:
for features, labels in dataset.take(1): # first batch
    print(labels)
    for key, value in features.items():
        print("{:20s}: {}".format(key,value.numpy()))

tf.Tensor([1 0 1 0 0 0 0 0 1 1 1 1 0 1 1 0 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 1], shape=(32,), dtype=int32)
 b'Astounding that something like this could find its way to be viewed by the public. I knew it was by Uwe Boll, & I found it in the bargain bin at a store for $2 (still pretty steep, considering) but morbid curiosity led me to view this, and: <br /><br />1). I am fairly sure this is a rip-off of Seven, Silence of the Lambs, and American Psycho, all rolled into one, with dialog that may have been written by preteens.<br /><br />2). Casper Van Dien plays the main character, and he\'s so absolutely bizarre and creepy that just about anyone would KNOW he must be the crazed serial killer.<br /><br />3). Jennifer Rubin plays the "good cop" that invites a serial killer to her apartment for a home cooked dinner, and what does she get for her trouble? I\'ll let you guess.<br /><br />4). Michael Pare plays an "intense" cop, who drives a VW Bug, new-style, that is, with a siren on it. A VW Bug.

In [24]:
dataset = dataset.map(lambda x, y: (tf.concat([tf.expand_dims(x['\ufefftext'], axis=-1)], axis=-1), y))
dataset

TypeError: in user code:

    File "C:\Users\renua\AppData\Local\Temp\ipykernel_17272\4275188195.py", line 1, in None  *
        lambda x, y: (tf.concat([tf.expand_dims(x['\ufefftext'], axis=-1)], axis=-1), y)

    TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got '\ufefftext'


## Building the model

In [21]:
model = tf.keras.Sequential([
    # layers.Input(shape=(30000, )),
    layers.Dense(32, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

In [22]:
model.compile('adam', tf.keras.losses.binary_crossentropy, metrics=['accuracy'])

In [23]:
model.fit(dataset, epochs=10)

Epoch 1/10


UnimplementedError: Graph execution error:

Detected at node 'sequential/dense/Cast' defined at (most recent call last):
    File "C:\Program Files\Python39\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Program Files\Python39\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Program Files\Python39\lib\asyncio\base_events.py", line 596, in run_forever
      self._run_once()
    File "C:\Program Files\Python39\lib\asyncio\base_events.py", line 1890, in _run_once
      handle._run()
    File "C:\Program Files\Python39\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\ipykernel\kernelbase.py", line 473, in dispatch_queue
      await self.process_one()
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\ipykernel\kernelbase.py", line 462, in process_one
      await dispatch(*args)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\ipykernel\kernelbase.py", line 369, in dispatch_shell
      await result
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\ipykernel\kernelbase.py", line 664, in execute_request
      reply_content = await reply_content
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\ipykernel\ipkernel.py", line 355, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\IPython\core\interactiveshell.py", line 2854, in run_cell
      result = self._run_cell(
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\IPython\core\interactiveshell.py", line 2900, in _run_cell
      return runner(coro)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3098, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3301, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3361, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\renua\AppData\Local\Temp\ipykernel_17272\3033054226.py", line 1, in <cell line: 1>
      model.fit(dataset, epochs=10)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\sequential.py", line 374, in call
      return super(Sequential, self).call(inputs, training=training, mask=mask)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\functional.py", line 451, in call
      return self._run_internal_graph(
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\engine\base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "d:\Documents\Mec2a\sentiment-analisys-with-MLP\venv\lib\site-packages\keras\layers\core\dense.py", line 166, in call
      inputs = tf.cast(inputs, dtype=self._compute_dtype_object)
Node: 'sequential/dense/Cast'
Cast string to float is not supported
	 [[{{node sequential/dense/Cast}}]] [Op:__inference_train_function_903]