# Slogan Generator - Test

## 0) Import Dependencies

In [1]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import unidecode
import pandas as pd
import string
import json
import requests
import random
import re

## 1) Prepare Data

In [2]:
all_characters = string.printable
n_characters = len(all_characters)

file = unidecode.unidecode(open('../Data/all_data_text.txt').read())
file_len = len(file)
print('file_len = ', file_len)

file_len =  47266


In [3]:
slogans = []

with open('../Data/all_data_text.txt') as text_file:
    for line in text_file:
        slogans.append(line.split('\n')[0].upper())

slogans[:10]

['WHAT IS A TAGLINE?',
 'A MEMORABLE DRAMATIC PHRASE',
 'REINFORCE AND STRENGTHEN THE AUDIENCE',
 'HOW IS A TAGLINE CREATED?',
 'A DIAMOND IS FOREVER',
 'A LITTLE',
 'DAB’LL',
 'DO YA',
 'A MARS A DAY HELPS YOU WORK, REST AND PLAY',
 'AH! BISTO']

### Picking a Random Slogan

In [None]:
def random_slogan():
    return random.choice(slogans).replace('"', "'")

(random_slogan())

### *ALTERNATE OPTION* Sample Random Chunks

In [4]:
chunk_len = 300

def random_chunk():
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    chunk = file[start_index:end_index]
    return chunk.upper()

print(random_chunk())

 BLUE AND SEE US THROUGH
SING LOUD AND CLEAR UNTIL THE GAME IS DONE
SING CHELSEA EVERYONE.
WHEN I FIND MYSELF IN TIMES OF TROUBLE, MOTHER MARY COMES TO ME
SPEAKING WORDS OF WISDOM, LET IT BE
AND IN MY HOUR OF DARKNESS SHE IS STANDING RIGHT IN FRONT OF ME
SPEAKING WORDS OF WISDOM, LET IT BE
LET IT BE,


## 2) Build the Model

In [5]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
    
    def forward(self, input, hidden):
        input = self.encoder(input.view(1, -1))
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output, hidden = self.gru(input.view(1, 1, -1), hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden

    def init_hidden(self):
        return (torch.zeros(self.n_layers, 1, self.hidden_size))

### Setting Inputs & Targets

In [6]:
# Turn string into list of longs
def char_tensor(string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
        tensor[c] = all_characters.index(string[c])
    return (tensor)

print(char_tensor('abcDEF'))

tensor([ 10,  11,  12,  39,  40,  41])


We can assemble a pair of input and target tensors for training, from a random chunk. The input will be all characters up to the last, and the target will be all characters from the first.

**NOTE** Because *random_slogan()* returns string with single quotes, ```'str'```, we need to convert it into a double quoted string. That way, we don't encounter issues with strings that contain commas.

There is no way in Python to do this, but by using **json**, we can output a double quoted string from the argument we pass in

In [7]:
def random_training_set():    
    chunk = json.dumps(random_chunk())
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target

## 3) Set up the Evaluator

In [8]:
def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str
    
    # Use priming string to "build up" hidden state
    for p in range(len(prime_str) - 1):
        _, hidden = decoder(prime_input[p], hidden)
        inp = prime_input[-1]
        
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        # Sample from the network as a multinomial distribution
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        # Add predicted character to string and use as next input
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)
        
    return predicted

## 4) Training

First, we will set up a helper function to keep track of time during training

In [9]:
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

Now, for the main training function:

In [10]:
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss = 0

    for c in range(chunk_len):
        output, hidden = decoder(inp[c], hidden)
        loss += criterion(output, target[c].unsqueeze(0))

    loss.backward()
    decoder_optimizer.step()

    return loss.data.item() / chunk_len

We define our hyperparameters, instantiate the model, and start straining:

In [11]:
n_epochs = 2000
print_every = 100
plot_every = 50
hidden_size = 100
n_layers = 1
lr = 0.005

decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses = []
loss_avg = 0

for epoch in range(1, n_epochs + 1):
    loss = train(*random_training_set())       
    loss_avg += loss

    if epoch % print_every == 0:
        print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
        print(evaluate('Wh', 100), '\n')

    if epoch % plot_every == 0:
        all_losses.append(loss_avg / plot_every)
        loss_avg = 0

[0m 55s (100 5%) 2.3617]
WhY SATIS SOR NOME HISSED KEE CARE\nBETING WHEENS TO WHERER CELLIN EPONK ONESSSS SUT NOT YO ARED OME D 

[1m 50s (200 10%) 2.0609]
WhH YURO TO MAKE I'S MACK, LIF THAT IN'T THE I DOD IN THE ITH I HANED IS NOF SO MAYS BE'M ING O KING I 

[2m 45s (300 15%) 1.9317]
WhILL DOW SOME THE BY A WHE YOU DAN LIVE ME THE EARE HOUR FORD\nTHEME YOUR THOUGHT BUT WOR NONCE THE T 

[3m 42s (400 20%) 1.9129]
WhE SIDE THE THE AND ERE TO CROUS ANVERLY\nSO THE STACH A SUP WHITH WISH\nWHAT YOU CAND SOMEAND IN THA 

[4m 37s (500 25%) 1.8956]
WhIN' LIGHT\nWHELIND -ARSE HAPP, BOUT WAT IN DIGHT\"\nAND EVERS LIGHT COULD PEOLERS OF PEEAR\nWE AGET  

[5m 34s (600 30%) 1.8855]
WhY, SOMED SOME\nTHI USPN WATEVE SOMERONG A AT I OF MEATS FIVE A ROUT ONE\nTHAT YOU FIND IF YOU FING A 



KeyboardInterrupt: 

### Plotting the Training Loss

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline

plt.figure()
plt.plot(all_losses)

## 5) Evaluation: Creating Potential Slogans

In [None]:
print(evaluate('Th', 10, temperature=0.8))

In [None]:
print(evaluate('A', 16, temperature=0.3))

In [None]:
print(evaluate('Th', 10, temperature=1.4))

In [None]:
def get_evaluations(num_predictions=100, predict_len=16, temperature=0.4):
    samples = []
    
    while len(samples) != num_predictions:
        prime_str = random.choice(string.ascii_uppercase)
        samples.append(evaluate(prime_str, predict_len, temperature))
        
    return samples

## 6) Sentiment Analysis on Potential Slogans

In [None]:
def get_sentiment(string):
    r = requests.post("http://text-processing.com/api/sentiment/", data={'text': string})
    return json.loads(r.text)

In [None]:
def get_candidates(num_candidates=10, predict_len=10, temperature=0.8):
    candidates = []
    # Keep track of how many evaluations we calculate
    sentiment_evaluations = 0
    # Evaluate potential candidated until we have our desired amount
    while len(candidates) != num_candidates:
        prime_str = random.choice(string.ascii_uppercase)
        sample = evaluate(prime_str, predict_len, temperature)
        
        # With predicted sample, run through sentiment analysis
        sentiment = get_sentiment(sample)
        sentiment_evaluations += 1
        
        # Finalizing candidate if it has a strong enough score
        if sentiment['probability']['pos'] > 0.70:
            print(sentiment['probability'])
            candidates.append(sample)
            
        # Stop early if calculate too many evaluations (request limit)
        if sentiment_evaluations >= 2000:
            print("too many attempts: " + str(sentiment_evaluations))
            return candidates
        
    print("number of sentiment evaluations done: " + str(sentiment_evaluations))
    return candidates

In [None]:
get_candidates()

## FINAL: Exporting the Trained Model

For use outside of this notebook, we want to save the model and make it easy to import in other files

In [None]:
torch.save(decoder.state_dict(), '../MVP/rnn.py')