In [1]:
import pandas as pd
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import sys
import random

Using TensorFlow backend.


In [2]:
trump_csv = pd.read_csv('Donald-Tweets!.csv')

trump_csv['Tweet_Text'].to_csv('Tweet_Text.txt', index=False)

In [3]:
file = open('Tweet_Text.txt', 'r', encoding='utf-8')
text = file.read()
file.close()

In [4]:
chars = list(set(text))
data_size, vocab_size = len(text), len(chars)
print('There are %d total characters and %d unique characters in our data.' % (data_size, vocab_size))

There are 865560 total characters and 106 unique characters in our data.


In [5]:
char_indices = { ch:i for i,ch in enumerate(sorted(chars)) }
indices_char = { i:ch for i,ch in enumerate(sorted(chars)) }
print(indices_char)

{0: '\n', 1: ' ', 2: '!', 3: '"', 4: '#', 5: '$', 6: '%', 7: '&', 8: '(', 9: ')', 10: '*', 11: '+', 12: ',', 13: '-', 14: '.', 15: '/', 16: '0', 17: '1', 18: '2', 19: '3', 20: '4', 21: '5', 22: '6', 23: '7', 24: '8', 25: '9', 26: ':', 27: ';', 28: '=', 29: '?', 30: '@', 31: 'A', 32: 'B', 33: 'C', 34: 'D', 35: 'E', 36: 'F', 37: 'G', 38: 'H', 39: 'I', 40: 'J', 41: 'K', 42: 'L', 43: 'M', 44: 'N', 45: 'O', 46: 'P', 47: 'Q', 48: 'R', 49: 'S', 50: 'T', 51: 'U', 52: 'V', 53: 'W', 54: 'X', 55: 'Y', 56: 'Z', 57: '[', 58: ']', 59: '_', 60: 'a', 61: 'b', 62: 'c', 63: 'd', 64: 'e', 65: 'f', 66: 'g', 67: 'h', 68: 'i', 69: 'j', 70: 'k', 71: 'l', 72: 'm', 73: 'n', 74: 'o', 75: 'p', 76: 'q', 77: 'r', 78: 's', 79: 't', 80: 'u', 81: 'v', 82: 'w', 83: 'x', 84: 'y', 85: 'z', 86: '{', 87: '|', 88: '}', 89: '~', 90: 'ʉ', 91: '̱', 92: 'ω', 93: 'я', 94: 'ӕ', 95: 'ԍ', 96: 'ԏ', 97: 'Ԡ', 98: 'ե', 99: 'լ', 100: 'ջ', 101: 'ُ', 102: '٪', 103: '\u06dd', 104: 'ۢ', 105: '۪'}


In [6]:
maxlen = 60
step = 3
sentences = []
next_chars = []
diversity = 0.7
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 288500


In [7]:
print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


In [8]:
# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)

    generated = ''
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    print('----- Generating with seed: "' + sentence + '"')
    sys.stdout.write(generated)

    for i in range(140):
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_indices[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, diversity)
        next_char = indices_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()
    print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
with tf.device('/gpu:0'):
    model.fit(x, y,
              batch_size=256,
              epochs=20,
              callbacks=[print_callback])

Build model...
Epoch 1/20
----- Generating text after Epoch: 0
----- Generating with seed: "rters by endorsing pro-war pro-TPP pro-Wall Street Crooked H"
rters by endorsing pro-war pro-TPP pro-Wall Street Crooked Hillary to 7:30 over the rele and and wint my lest toter atessed to eed to saud by vote winter trampen every ofer will but. Thank you heres d
Epoch 2/20
----- Generating text after Epoch: 1
----- Generating with seed: " very gentle!"
The arrogant young woman who questioned me in"
 very gentle!"
The arrogant young woman who questioned me in the failer people are up a need last the for a the the prees than out a great as only who vetio sheed of medials great and the distraenng t
Epoch 3/20
----- Generating text after Epoch: 2
----- Generating with seed: "ery sad that he did not go!
"""@Carolyn82471448: @WayneDupre"
ery sad that he did not go!
"""@Carolyn82471448: @WayneDupre: @realDonaldTrump is a liggeraise for and you think you a- @CNN have our dibed mote done on @realDonaldTr

e are going to make this a government of the people once against dont be a representrationst locing @CNN new have to be the dont tonight and v

  from ipykernel import kernelapp as app


ery won in teerest today. We are long that we can https://
Epoch 18/20
----- Generating text after Epoch: 17
----- Generating with seed: "he oil years ago (like I have been saying).
"Wish Obama woul"
he oil years ago (like I have been saying).
"Wish Obama would be solver the traders. The President that you will be doing a sell the falled smoing everything to couldnd on @CNN states. #DrainJohard"
"
Epoch 19/20
----- Generating text after Epoch: 18
----- Generating with seed: "Tarant: America needs @realDonaldTrump ! #Trump2016 #BuildTh"
Tarant: America needs @realDonaldTrump ! #Trump2016 #BuildThind AmericaGreatAgain https://t.co/txahDGajNB3
"Great job #DebayBSC @GOPverynt News #Sulten endorsement go bossed from Surbe somurecald of t
Epoch 20/20
----- Generating text after Epoch: 19
----- Generating with seed: "cannot take four more years of Barack Obama and that۪s what "
cannot take four more years of Barack Obama and that۪s what #DeanDCaryForts Donald Trump wow! Check &amp; the shad t

In [11]:
with tf.device('/gpu:0'):
    model.fit(x, y,
              batch_size=256,
              epochs=10,
              callbacks=[print_callback])

Epoch 1/10
----- Generating text after Epoch: 0
----- Generating with seed: "t and liabilities. So simple to understand but @CNN &amp; @C"
t and liabilities. So simple to understand but @CNN &amp; @CNN Sen will neve

  from ipykernel import kernelapp as app


r respect say you!  https://t.co/KTuJF7YG70"
"Thank you Donald Trump"" #MakeAmericaGreatAgain #Trump2016 https://t.co/ongkuR
Epoch 2/10
----- Generating text after Epoch: 1
----- Generating with seed: "can find your polling locations at:
https://t.co/BmZyKQOZJJ "
can find your polling locations at:
https://t.co/BmZyKQOZJJ https://t.co/EOtyZEyd7m"
"""@orelandccotsh: @realDonaldTrump @FoxNews https://t.co/3eg6GkJ83V"
"""@blargyenlites: @realDonaldTrump 78,000 th
Epoch 3/10
----- Generating text after Epoch: 2
----- Generating with seed: "GreatAgain #Trump2016"" http://t.co/GSXf8Tfzvn"
"""@nozzero "
GreatAgain #Trump2016"" http://t.co/GSXf8Tfzvn"
"""@nozzero poll not to campaign in their law enorsing in the former Depage, he was going to be ""greaty officity. Thank you Genator after reporters tod
Epoch 4/10
----- Generating text after Epoch: 3
----- Generating with seed: "rous, dishonest."
"If you want to know about Hillary Clinton"
rous, dishonest."
"If you want to know about Hillary Cli