# IMPORTING AND LOADING

In [16]:
import subprocess
import torch
import torch.nn as nn
import time
from torch.autograd import Variable
from torch.utils.data import Dataset
import pandas as pd
import numpy as np

experiment = "seperate_models"

train_vectors = np.load("data/"+ experiment + "/train_vectors.npy")[:2000]
train_sentences = np.load("data/"+ experiment + "/train_sentences.npy")[:2000]

vec = train_vectors[::5], train_vectors[1::5], train_vectors[2::5], train_vectors[3::5], train_vectors[4::5]
sen = train_sentences[::5], train_sentences[1::5], train_sentences[2::5], train_sentences[3::5], train_sentences[4::5]

dataset1 = np.asarray(vec[0:2])
dataset2 = np.asarray(vec[0:3])
dataset3 = np.asarray(vec[0:4])
dataset4 = np.asarray(vec[0:5])

vec = None

sentences1 = np.asarray(sen[0:2])
sentences2 = np.asarray(sen[0:3])
sentences3 = np.asarray(sen[0:4])
sentences4 = np.asarray(sen[0:5])

sen = None


# MODEL DEFINITON AND PARAMETERS

In [17]:
class BasicGRU(nn.Module):
    def __init__(self, hidden_size, n_layers=1):
        super(BasicGRU, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=0, bidirectional=True)       
        self.lin = nn.Linear(hidden_size, hidden_size)

    def forward(self, input_seq, input_lengths, hidden=None):

        packed = torch.nn.utils.rnn.pack_padded_sequence(input_seq, input_lengths, batch_first=True)

        outputs, hidden = self.gru(packed, hidden)
        
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs , batch_first=True)

        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]

        output = self.lin(outputs[:,-1,:].unsqueeze(1))
        return output

In [18]:
class StoryVectors(Dataset):

    def __init__(self, dataset, sentences):
        self.dataset = dataset
        self.type = self.dataset.shape[0]
        self.sen = sentences

        

    def __len__(self):
        return self.dataset.shape[1]

    def __getitem__(self, idx):
      
        if self.type == 2:
            X = [self.dataset[0][idx]]
            y = [self.dataset[1][idx]]
            sentences = [self.sen[0][idx], self.sen[1][idx]]
          
          
        elif self.type == 3:
            X = [self.dataset[0][idx], self.dataset[1][idx]]
            y = [self.dataset[2][idx]]
            sentences = [self.sen[0][idx], self.sen[1][idx], self.sen[2][idx]]
        
        elif self.type == 4:
            X = [self.dataset[0][idx], self.dataset[1][idx], self.dataset[2][idx]]
            y = [self.dataset[3][idx]]
            sentences = [self.sen[0][idx], self.sen[1][idx], self.sen[2][idx], self.sen[3][idx]]
        
        elif self.type == 5:
            X = [self.dataset[0][idx], self.dataset[1][idx], self.dataset[2][idx], self.dataset[3][idx]]
            y = [self.dataset[4][idx]]
            sentences = [self.sen[0][idx], self.sen[1][idx], self.sen[2][idx], self.sen[3][idx], self.sen[4][idx]]
        
        
        return [X, len(X), y, sentences]
      
def vocab_collate_func(batch):
    X = []
    y = []
    lengths = []
    sentences = []

    for datum in batch:
        X.append(datum[0])
        lengths.append(datum[1])
        y.append(datum[2])
        sentences.append(datum[3])

    return [torch.FloatTensor(X), torch.LongTensor(lengths), torch.FloatTensor(y), sentences]

In [19]:
model1 = torch.load("model/model1_2000.tar", map_location={'cuda:0': 'cpu'})
model2 = torch.load("model/model2_2000.tar", map_location={'cuda:0': 'cpu'})
model3 = torch.load("model/model3_2000.tar", map_location={'cuda:0': 'cpu'})
model4 = torch.load("model/model4_2000.tar", map_location={'cuda:0': 'cpu'})

### TEST LOADER

In [51]:
def nn(qvec, vectors, array, k=5):
    print("computing scores")
    scores = np.dot(qvec, vectors.T).flatten()
    sorted_args = np.argsort(scores)[::-1]
    sentences = [(array[a], scores[a]) for a in sorted_args[:k]]
    for i, s in enumerate(sentences):
        print (s, sorted_args[i])
        
def suggestions(vectors, sentences, dataset_vectors, dataset_sentences, k=5):
    l, _ = vectors.shape
    vectors = np.append(vectors, np.zeros((1,4800)), axis=0)
    vectors = np.expand_dims(vectors, axis=1)
    sentences = np.append(sentences, "dummy sentence for label")
    sentences = np.expand_dims(sentences, axis=1)
    test_dataset = StoryVectors(vectors, sentences)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                               batch_size=1,
                                               collate_fn=vocab_collate_func,
                                               shuffle=True,
                                               num_workers=4)
    
    if l == 1:
        print("Calling model 1")
        model1.eval()
        for data, lengths, labels, sentences in test_loader:
                print("predicting vectors")
                pred = model1(data, lengths)
        pred = pred.detach().numpy().squeeze()
        nn(pred, dataset_vectors, dataset_sentences, k) 
        
    elif l == 2:
        print("Calling model 2")
        model2.eval()
        for data, lengths, labels, sentences in test_loader:
                print("predicting vectors")
                pred = model2(data, lengths)
        pred = pred.detach().numpy().squeeze()
        nn(pred, dataset_vectors, dataset_sentences, k)
        
    elif l == 3:
        print("Calling model 3")
        model3.eval()
        for data, lengths, labels, sentences in test_loader:
                print("predicting vectors")
                pred = model3(data, lengths)
        pred = pred.detach().numpy().squeeze()
        nn(pred, dataset_vectors, dataset_sentences, k)
        
    elif l == 4:
        print("Calling model 4")
        model4.eval()
        for data, lengths, labels, sentences in test_loader:
                print("predicting vectors")
                pred = model4(data, lengths)
        pred = pred.detach().numpy().squeeze()
        nn(pred, dataset_vectors, dataset_sentences, k)
        
    else: 
        print("Story too longgg.")
        return


# INTERFACE

How it works? 

This is an interactive story generating system that uses human authoring along with the system's expertise to generate intriguing short stories. The system generates the first sentence, or the story, following which it makes suggestions for the next sentence. You can choose from the, suggestions you see in the list by typing the corresponding digit for your choosen next sentence,  or you can eneter a sentence which then becomes the part of the narration. This procedure continues till we have a short 5 sentence story.

In [71]:
experiment = 'interface'
story = []
n = np.random.randint(0, len(sentences1[0]), 5)
print("INITIAL SENTENCE \n")
print('\n'.join([sentences1[0][i].decode('UTF-8') for i in n]))

INITIAL SENTENCE 

Emily used to hate cleaning day , but that changed on Saturday .
Emily spend all day baking a cake for Emily's daughter .
One day Emily was on YouTube .
John was visiting summer camp for the first time .
John is camping .


In [72]:
s = input()
story.append(s)
print("Story so far: " + ' '.join(story))
l = len(story)
print("Converting to vectors... eta: 1 min")
p1 = subprocess.run(['/Users/shrey/anaconda3/envs/py2/bin/python', 'text2vec.py', ''.join(story)], stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
vectors = np.load("data/" + experiment + "/" + str(l) + ".npy")
suggestions(vectors, story, train_vectors, train_sentences, k=10)

KeyboardInterrupt: 

In [43]:
s = input()
story.append(s)
print("Story so far: " + ' '.join(story))
l = len(story)
print("Converting to vectors")
p1 = subprocess.run(['/Users/shrey/anaconda3/envs/py2/bin/python', 'text2vec.py', ''.join(story)], stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
vectors = np.load("data/" + experiment + "/" + str(l) + ".npy")
suggestions(vectors, story, train_vectors, train_sentences, k=10)


 John did n't want to spend a lot of money .


Story so far: John went to the market. John did n't want to spend a lot of money .
Calling model 2
predicting vectors
computing scores
(b'Peter went to the library to check out a book on basketball .', 1.0152367) 962
(b'John found some leftovers from a restaurant .', 0.99243754) 372
(b'John went to the store to buy the items needed for the repellant .', 0.9870586) 114
(b"John went to the bank to get a loan to start John's business .", 0.9849243) 311
(b'John asked for a refund for the pizza .', 0.98466194) 1189
(b'John went to the bank and asked for a loan .', 0.9775698) 657
(b'John went outside to retrieve the newspaper .', 0.9758084) 1826
(b"John left John's debit card at the bar .", 0.9754853) 1745
(b"John stopped at a restaurant and left the unopened phone on John's seat .", 0.97514606) 212
(b"John enjoyed John's sandwich and was glad John had n't spent extra money .", 0.97461814) 1834


In [44]:
s = input()
story.append(s)
print("Story so far: " + ' '.join(story))
l = len(story)
print("Converting to vectors")
p1 = subprocess.run(['/Users/shrey/anaconda3/envs/py2/bin/python', 'text2vec.py', ''.join(story)], stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
vectors = np.load("data/" + experiment + "/" + str(l) + ".npy")
suggestions(vectors, story, train_vectors, train_sentences, k=10)

 He bought cheap food.


Story so far: John went to the market. John did n't want to spend a lot of money . He bought cheap food.
Calling model 3
predicting vectors
computing scores
(b"It could barely keep up with John's workload .", 0.86232746) 887
(b'Unfortunately , John was grounded .', 0.8519374) 1666
(b"There were several sales on John's account .", 0.85013354) 1747
(b"But when Emily saw mourners compliment Emily's work , Emily was happy .", 0.84867895) 1988
(b'It looked like John peed John .', 0.847754) 1383
(b'It was all John could afford .', 0.8476577) 1196
(b'The consensus was clean socks .', 0.844612) 203
(b'It was full of vegetables and meat .', 0.8433764) 1317
(b'And John felt regretful about all of the money John wasted .', 0.8391314) 1608
(b'It was a pizzeria .', 0.83523697) 901


In [45]:
s = input()
story.append(s)
print("Story so far: " + ' '.join(story))
l = len(story)
print("Converting to vectors")
p1 = subprocess.run(['/Users/shrey/anaconda3/envs/py2/bin/python', 'text2vec.py', ''.join(story)], stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
vectors = np.load("data/" + experiment + "/" + str(l) + ".npy")
suggestions(vectors, story, train_vectors, train_sentences, k=10)

 John was not happy with the quality of the food.


Story so far: John went to the market. John did n't want to spend a lot of money . He bought cheap food. John was not happy with the quality of the food.
Calling model 4
predicting vectors
computing scores
(b'They decided John would cook every time they went camping .', 0.9054463) 1319
(b"John was a very clean person because of John's hygeine .", 0.904766) 183
(b"John was nearly out of food in John's house .", 0.9039933) 570
(b'Peter was very sick and John had to take John to a doctor .', 0.9019178) 1074
(b'When John went to India the first time , John was sick for a week .', 0.901726) 490
(b"John was pleased John had the family heirloom in John's home .", 0.8996477) 1234
(b"But John's car got broken into while John was eating .", 0.89806366) 213
(b'As John was cooking , John could not find the ricotta cheese .', 0.89661825) 897
(b'John fed John daily and had no idea that the dog was vicious .', 0.8965687) 231
(b"John was upset John could n't finish John's dinner .", 0.89616954) 899


## FINAL STORY

In [73]:
# s = input()
# story.append(s)
print("FINAL STORY: \n")
print("\n".join(story))

FINAL STORY: 

John went to the market.
John did n't want to spend a lot of money .
He bought cheap food.
John was not happy with the quality of the food.
John was upset John could n't finish John's dinner .
