In [2]:
!kaggle datasets download -d pierremegret/dialogue-lines-of-the-simpsons

401 - Unauthorized


In [9]:
!head simpsons_dataset.csv

raw_character_text,spoken_words
Miss Hoover,"No, actually, it was a little of both. Sometimes when a disease is in all the magazines and all the news shows, it's only natural that you think you have it."
Lisa Simpson,Where's Mr. Bergstrom?
Miss Hoover,I don't know. Although I'd sure like to talk to him. He didn't touch my lesson plan. What did he teach you?
Lisa Simpson,That life is worth living.
Edna Krabappel-Flanders,"The polls will be open from now until the end of recess. Now, just in case any of you have decided to put any thought into this, we'll have our final statements. Martin?"
Martin Prince,I don't think there's anything left to say.
Edna Krabappel-Flanders,Bart?
Bart Simpson,Victory party under the slide!
,


In [2]:
!unzip dialogue-lines-of-the-simpsons.zip

Archive:  dialogue-lines-of-the-simpsons.zip
  inflating: simpsons_dataset.csv    


In [3]:
import pandas as pd

In [19]:
lines_df = pd.read_csv('simpsons_dataset.csv')
lines_df = lines_df.dropna()
characters = lines_df['raw_character_text'].values
lines = lines_df['spoken_words'].values

In [20]:
lines_df.head(20)

Unnamed: 0,raw_character_text,spoken_words
0,Miss Hoover,"No, actually, it was a little of both. Sometim..."
1,Lisa Simpson,Where's Mr. Bergstrom?
2,Miss Hoover,I don't know. Although I'd sure like to talk t...
3,Lisa Simpson,That life is worth living.
4,Edna Krabappel-Flanders,The polls will be open from now until the end ...
5,Martin Prince,I don't think there's anything left to say.
6,Edna Krabappel-Flanders,Bart?
7,Bart Simpson,Victory party under the slide!
9,Lisa Simpson,Mr. Bergstrom! Mr. Bergstrom!
10,Landlady,"Hey, hey, he Moved out this morning. He must h..."


In [17]:
lines[:1000]

array(["No, actually, it was a little of both. Sometimes when a disease is in all the magazines and all the news shows, it's only natural that you think you have it.",
       "Where's Mr. Bergstrom?",
       "I don't know. Although I'd sure like to talk to him. He didn't touch my lesson plan. What did he teach you?",
       'That life is worth living.',
       "The polls will be open from now until the end of recess. Now, just in case any of you have decided to put any thought into this, we'll have our final statements. Martin?",
       "I don't think there's anything left to say.", 'Bart?',
       'Victory party under the slide!', nan,
       'Mr. Bergstrom! Mr. Bergstrom!',
       'Hey, hey, he Moved out this morning. He must have a new job -- he took his Copernicus costume.',
       'Do you know where I could find him?',
       "I think he's taking the next train to Capital City.",
       'The train, how like him... traditional, yet environmentally sound.',
       "Yes, and it's bee

In [22]:
import spacy
docs = []
nlp = spacy.load('en_core_web_lg')
count = 0
for doc in nlp.pipe(lines):
    docs.append(doc)

In [27]:
vectors = [doc.vector for doc in docs]

In [33]:
lines_df['vectors'] = vectors

In [40]:
lines_df.head()

Unnamed: 0,raw_character_text,spoken_words,vectors
0,Miss Hoover,"No, actually, it was a little of both. Sometim...","[-0.07657106, 0.26351482, -0.17721403, -0.0891..."
1,Lisa Simpson,Where's Mr. Bergstrom?,"[-0.0003000036, 0.401856, 0.020902596, -0.0411..."
2,Miss Hoover,I don't know. Although I'd sure like to talk t...,"[-0.011891552, 0.2516941, -0.27629453, -0.1274..."
3,Lisa Simpson,That life is worth living.,"[-0.006498333, 0.30273068, -0.06274977, -0.257..."
4,Edna Krabappel-Flanders,The polls will be open from now until the end ...,"[0.071594164, 0.19087753, -0.110537454, -0.004..."


In [23]:
from sklearn.neighbors import NearestNeighbors

In [28]:
nn  = NearestNeighbors(n_neighbors=5, algorithm='ball_tree')
nn.fit(vectors)

NearestNeighbors(algorithm='ball_tree', leaf_size=30, metric='minkowski',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [30]:
def get_vectors_of_string(inp_str):
    return nlp(inp_str).vector

In [48]:
test_vect = get_vectors_of_string("Mountain Dew or Crab Juice")

In [49]:
neighbs = nn.kneighbors([test_vect])

In [41]:
def find_vector(vect):
    return lines_df.apply(lambda x: x if x['vectors'] == vect else '', axis=1)

In [46]:
neighbs[1][0]

array([125445, 122311,  32418,  34000,  47689])

In [50]:
lines_df.iloc[neighbs[1][0]]

Unnamed: 0,raw_character_text,spoken_words,vectors
40649,Vendor,Mountain Dew or crab juice.,"[-0.120053835, 0.07410167, 0.085148335, -0.402..."
98465,Michael,"Dandelion greens, a sprig of wild dill, fresh ...","[-0.07447155, 0.2494231, -0.045560263, -0.1165..."
114104,Teenage Stagehand,"Ms. Nebraska, your purified water and fried ch...","[-0.08589353, 0.13033754, -0.015297831, -0.121..."
5433,Marge Simpson,Fruit leather... Tree Fresh Imitation Orange D...,"[-0.1189513, 0.02974447, -0.006639198, -0.2499..."
90141,Homer Simpson,"Well, I invented a Popsicle made of Mountain Dew.","[0.019181365, 0.17568728, -0.12004599, -0.3499..."


In [51]:
def find_quotes(inp_str):
    vect = get_vectors_of_string(inp_str)
    closest_quotes = nn.kneighbors([vect])
    return lines_df.iloc[closest_quotes[1][0]]


In [59]:
find_quotes("they're all in the hammock district")

Unnamed: 0,raw_character_text,spoken_words,vectors
101746,Gary Chalmers,But you do live in the district.,"[0.026252124, 0.17488113, -0.1543875, -0.08782..."
34275,Hank Scorpio,"That might... Matter of fact, they're all in t...","[-0.0005717829, 0.19351904, -0.08779617, -0.11..."
87637,CHLOE,Two things are certain -- the mayor is in deep...,"[0.026974047, 0.052787412, -0.14665185, -0.108..."
20066,Seymour Skinner,"If the jury is deadlocked, they're put up in a...","[0.021742309, 0.17655937, -0.11094219, -0.0486..."
123963,Father,What the hell are you making chairs for? In th...,"[-0.022090739, 0.13985485, -0.12785573, -0.086..."


In [60]:
find_quotes("steamed hams")

Unnamed: 0,raw_character_text,spoken_words,vectors
153503,Homer Simpson,Smothered pork chops.,"[-0.38019222, -0.1277625, 0.34242252, 0.077048..."
49564,Homer Simpson,Steamed Maine cabbages!,"[-0.277914, -0.041433744, -0.075511254, -0.372..."
32674,Gary Chalmers,"For ""steamed hams.""","[-0.20388983, 0.04237333, -0.18209751, -0.1394..."
32664,Gary Chalmers,"You call hamburgers ""steamed hams?""","[-0.22379924, 0.09646686, -0.088719375, 0.0168..."
110504,Moe Szyslak,Ham sandwiches!,"[-0.33784032, -0.18016668, 0.46686664, 0.22452..."


In [75]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np

In [69]:
!pip install tensorflow_hub

Collecting tensorflow_hub
[?25l  Downloading https://files.pythonhosted.org/packages/b5/be/f18c352d84382d9c795a0f37eaf16d42ace7d161fbb0ad20bdcd5e550015/tensorflow_hub-0.5.0-py2.py3-none-any.whl (78kB)
[K     |████████████████████████████████| 81kB 12.8MB/s eta 0:00:01
Installing collected packages: tensorflow-hub
Successfully installed tensorflow-hub-0.5.0


In [77]:
def heatmap(x_labels, y_labels, values):
    fig, ax = plt.subplots()
    im = ax.imshow(values)

    # We want to show all ticks...
    ax.set_xticks(np.arange(len(x_labels)))
    ax.set_yticks(np.arange(len(y_labels)))
    # ... and label them with the respective list entries
    ax.set_xticklabels(x_labels)
    ax.set_yticklabels(y_labels)

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", fontsize=10,
         rotation_mode="anchor")

    # Loop over data dimensions and create text annotations.
    for i in range(len(y_labels)):
        for j in range(len(x_labels)):
            text = ax.text(j, i, "%.2f"%values[i, j],
                           ha="center", va="center", color="w", 
fontsize=6)

    fig.tight_layout()
    plt.show()

In [78]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder/1?tf-hub-format=compressed"

# Import the Universal Sentence Encoder's TF Hub module
embed = hub.Module(module_url)

# sample text
messages = [
# Smartphones
"My phone is not good.",
"Your cellphone looks great.",

# Weather
"Will it snow tomorrow?",
"Recently a lot of hurricanes have hit the US",

# Food and health
"An apple a day, keeps the doctors away",
"Eating strawberries is healthy",
]

similarity_input_placeholder = tf.placeholder(tf.string, shape=(None))
similarity_message_encodings = embed(similarity_input_placeholder)
with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    session.run(tf.tables_initializer())
    message_embeddings_ = session.run(similarity_message_encodings, feed_dict={similarity_input_placeholder: messages})

    corr = np.inner(message_embeddings_, message_embeddings_)
    print(corr)
    heatmap(messages, messages, corr)

InternalError: Dst tensor is not initialized.
	 [[node checkpoint_initializer_73 (defined at /home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow_hub/native_module.py:407) ]]

Original stack trace for 'checkpoint_initializer_73':
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 563, in start
    self.io_loop.start()
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tornado/gen.py", line 787, in inner
    self.run()
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tornado/gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 272, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 542, in execute_request
    user_expressions, allow_stdin,
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2855, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in _run_cell
    return runner(coro)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3058, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3249, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-78-838d8046134e>", line 4, in <module>
    embed = hub.Module(module_url)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow_hub/module.py", line 170, in __init__
    tags=self._tags)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow_hub/native_module.py", line 340, in _create_impl
    name=name)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow_hub/native_module.py", line 399, in __init__
    self._init_state(name)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow_hub/native_module.py", line 407, in _init_state
    self._variable_map)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py", line 291, in init_from_checkpoint
    init_from_checkpoint_fn)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py", line 1684, in merge_call
    return self._merge_call(merge_fn, args, kwargs)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/distribute/distribute_lib.py", line 1691, in _merge_call
    return merge_fn(self._strategy, *args, **kwargs)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py", line 286, in <lambda>
    ckpt_dir_or_file, assignment_map)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py", line 334, in _init_from_checkpoint
    _set_variable_or_list_initializer(var, ckpt_file, tensor_name_in_ckpt)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py", line 458, in _set_variable_or_list_initializer
    _set_checkpoint_initializer(variable_or_list, ckpt_file, tensor_name, "")
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/training/checkpoint_utils.py", line 412, in _set_checkpoint_initializer
    ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0]
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1696, in restore_v2
    name=name)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3616, in create_op
    op_def=op_def)
  File "/home/will/anaconda3/envs/gpu/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()
