# Libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
from nltk import ngrams
from collections import defaultdict

In [3]:
import random
import json
import os

In [4]:
pd.options.display.max_colwidth = -1

# Data

In [5]:
data = pd.DataFrame(columns=['Quote'])
for folderName, subfolders, filenames in os.walk('E:/Scrapped-Data/Quotes-Goodreads/'):
    for filename in filenames:
        data = pd.concat([pd.read_csv('E:/Scrapped-Data/Quotes-Goodreads/'+filename), data], axis=0)
print(len(data))

92970


In [6]:
data.head()

Unnamed: 0,Quote
0,"“I love deadlines. I love the whooshing noise they make as they go by.” ― Douglas Adams, The Salmon of Doubt //"
1,"“There is no greater agony than bearing an untold story inside you.” ― Maya Angelou, I Know Why the Caged Bird Sings //"
2,"“What really knocks me out is a book that, when you're all done reading it, you wish the author that wrote it was a terrific friend of yours and you could call him up on the phone whenever you felt like it. That doesn't happen much, though.” ― J.D. Salinger, The Catcher in the Rye //"
3,"“If there's a book that you want to read, but it hasn't been written yet, then you must write it.” ― Toni Morrison"
4,“There is nothing to writing. All you do is sit down at a typewriter and bleed.” ― Ernest Hemingway


# Cleaning

In [7]:
def clean(x):
    x = x.split('―')[0]
    x = x[1:-2]
    return x+ ' endquote'

In [8]:
data['Quote'] = data['Quote'].apply(clean)
data.drop_duplicates(inplace=True)
data.reset_index(drop=True, inplace=True)
print(len(data))

73624


In [9]:
data.head()

Unnamed: 0,Quote
0,I love deadlines. I love the whooshing noise they make as they go by. endquote
1,There is no greater agony than bearing an untold story inside you. endquote
2,"What really knocks me out is a book that, when you're all done reading it, you wish the author that wrote it was a terrific friend of yours and you could call him up on the phone whenever you felt like it. That doesn't happen much, though. endquote"
3,"If there's a book that you want to read, but it hasn't been written yet, then you must write it. endquote"
4,There is nothing to writing. All you do is sit down at a typewriter and bleed. endquote


# Training

In [10]:
vocab = data['Quote'].apply(lambda x: x.split()[0]).values.tolist()

In [11]:
n = 4
model = defaultdict(lambda: defaultdict(lambda: 0))
word_grams = []
for i in range(len(data)):
    word_grams += list(ngrams(data['Quote'][i].lower().split(), n+1, 
                              pad_left=True, pad_right=True,  left_pad_symbol='', right_pad_symbol=''))
for w in word_grams:
    model[w[:-1]][w[-1]] += 1
for w1 in model:
    total_count = float(sum(model[w1].values()))
    for w2 in model[w1]:
        model[w1][w2] /= total_count
model = dict(model)        

In [12]:
q = lambda x : list(ngrams(x.lower().split(), n, pad_left=True, pad_right=False,  left_pad_symbol=''))[-1]

In [13]:
query = "Don't cry"
query = q(query)
print(query)
print(dict(model[query]))

('', '', "don't", 'cry')
{'mommy."': 0.3333333333333333, 'because': 0.3333333333333333, 'over': 0.3333333333333333}


In [14]:
text = str(random.choice(vocab))
print('Me:', text)
sentence_finished = False
 
while not sentence_finished:
    r = random.random()
    accumulator = .0
    if len(model[q(text)].keys()) == 0:
        break
    for word in model[q(text)].keys():
        accumulator = model[q(text)][word]
        if accumulator >= r and word != 'endquote':
            text += ' '+word
            break
        if word == 'endquote':
            sentence_finished = True
print('Inspyrobot:', end = ' ')      
print(' '.join([t for t in text.split() if t]))

Me: I
Inspyrobot: I love you too, i wanted to say with as much hurtful sarcasm as i could muster, but she hadn't seen me, and i kept quiet. i did love her, of course, but mostly just because loving your mother is mandatory, not because she's someone i think i'd like very much if i met her walking down the street. which she wouldn't be anyway; walking is for poor people


# Pickling

In [15]:
DIR = 'E:/Models/Inspyrobot-Probabilistic-v1'
os.mkdir(DIR)

In [16]:
with open(f'{DIR}/model.json', 'w') as f:
    k = model.keys() 
    v = model.values() 
    k1 = [str(i) for i in k]
    json.dump(json.dumps(dict(zip(*[k1,v]))),f)     

In [17]:
with open(f'{DIR}/model.json', 'r') as f:
    data = json.load(f)
    dic = json.loads(data)
    k = dic.keys() 
    v = dic.values() 
    k1 = [eval(i) for i in k] 
    model =  dict(zip(*[k1,v]))

In [18]:
with open(f'{DIR}/vocab.json', 'w') as f:
    json.dump(json.dumps(vocab),f)  

In [19]:
with open(f'{DIR}/vocab.json', 'r') as f:
    vocab = json.loads(json.load(f))

In [20]:
text = str(random.choice(vocab))
print('Me:', text)
sentence_finished = False
 
while not sentence_finished:
    r = random.random()
    accumulator = .0
    if len(model[q(text)].keys()) == 0:
        break
    for word in model[q(text)].keys():
        accumulator = model[q(text)][word]
        if accumulator >= r and word != 'endquote':
            text += ' '+word
            break
        if word == 'endquote':
            sentence_finished = True
print('Inspyrobot:', end = ' ')      
print(' '.join([t for t in text.split() if t]))

Me: Never
Inspyrobot: Never let your sense of morals prevent you from doing what is right.


# Wrapper Functions

In [21]:
import json, random
from nltk import ngrams

DIR = 'E:/Models/Inspyrobot-Probabilistic-v1's
n = 4
q = lambda x : list(ngrams(x.lower().split(), n, pad_left=True, pad_right=False,  left_pad_symbol=''))[-1]

def inspyre(text=''):
    with open(f'{DIR}/model.json', 'r') as f:
        data = json.load(f)
        dic = json.loads(data)
        k = dic.keys() 
        v = dic.values() 
        k1 = [eval(i) for i in k] 
        model =  dict(zip(*[k1,v]))
    if text == '':
        with open(f'{DIR}/vocab.json', 'r') as f:
            vocab = json.loads(json.load(f))
        text = str(random.choice(vocab))
    sentence_finished = False    
    while not sentence_finished:
        r = random.random()
        accumulator = .0
        if len(model[q(text)].keys()) == 0:
            break
        for word in model[q(text)].keys():
            accumulator = model[q(text)][word]
            if accumulator >= r and word != 'endquote':
                text += ' '+word
                break
            if word == 'endquote':
                sentence_finished = True
    print('Inspyrobot:', end = ' ')      
    print(' '.join([t for t in text.split() if t]))

In [22]:
inspyre("I'm")

Inspyrobot: I'm not a writer. ernest hemingway was a writer. i just have a vivid imagination and type 90 wpm.
