In [1]:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import time
import csv
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM, SimpleRNN
from keras.layers.wrappers import TimeDistributed


Using TensorFlow backend.


In [86]:
DATA_DIR = '../../../analysis/data/nbmodel_templates.csv'
BATCH_SIZE = 100
HIDDEN_DIM = 100
SEQ_LENGTH = 50
WEIGHTS = ''

GENERATE_LENGTH = 100
LAYER_NUM = 2

In [3]:
reports = []

with open(DATA_DIR, encoding='utf-8-sig') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        else:
            reports.append(row[3].replace('"', '').replace("[", '').replace(']', ''))
            line_count += 1
    print(f'Processed {line_count} lines.')

Column names are report, report_chunks, template_chunks, templates
Processed 1953 lines.


In [4]:
data = ' '.join(reports).split(' ')
len(data)

40864

In [5]:
data[:10]

['${player_name}',
 'caught',
 '${receptions}',
 'passes',
 'for',
 '${rec_yards}',
 'yards',
 'and',
 'a',
 'touchdown']

In [59]:
# method for preparing the training data
def load_data(data, seq_length):
    chunks = list(set(data))
    VOCAB_SIZE = len(chunks)

    print('Data length: {} chunks'.format(len(data)))
    print('Vocabulary size: {} chunks'.format(VOCAB_SIZE))

    ix_to_chunk = {ix:chunk for ix, chunk in enumerate(chunks)}
    chunk_to_ix = {chunk:ix for ix, chunk in enumerate(chunks)}
    
    num_seq = int(np.floor((len(data) - 1) / seq_length))

    X = np.zeros((num_seq, seq_length, VOCAB_SIZE))
    y = np.zeros((num_seq, seq_length, VOCAB_SIZE))
    for i in range(0, num_seq):
        X_sequence = data[i*seq_length:(i+1)*seq_length]
        X_sequence_ix = [chunk_to_ix[value] for value in X_sequence]
        input_sequence = np.zeros((seq_length, VOCAB_SIZE))
        for j in range(seq_length):
            input_sequence[j][X_sequence_ix[j]] = 1.
            X[i] = input_sequence

        y_sequence = data[i*seq_length+1:(i+1)*seq_length+1]
        y_sequence_ix = [chunk_to_ix[value] for value in y_sequence]
        target_sequence = np.zeros((seq_length, VOCAB_SIZE))
        for j in range(seq_length):
            target_sequence[j][y_sequence_ix[j]] = 1.
            y[i] = target_sequence
    return X, y, VOCAB_SIZE, ix_to_chunk

In [60]:
# Creating training data
X, y, VOCAB_SIZE, ix_to_chunk = load_data(data, SEQ_LENGTH)

Data length: 40864 chunks
Vocabulary size: 550 chunks


In [13]:
# method for generating text
def generate_text(model, length, vocab_size, ix_to_chunk):
    # starting with random chunk
    ix = [np.random.randint(vocab_size)]
    y_chunk = [ix_to_chunk[ix[-1]]]
    X = np.zeros((1, length, vocab_size))
    for i in range(length):
        # appending the last predicted chunk to sequence
        X[0, i, :][ix[-1]] = 1
        print(ix_to_chunk[ix[-1]], end=" ")
        ix = np.argmax(model.predict(X[:, :i+1, :])[0], 1)
        y_chunk.append(ix_to_chunk[ix[-1]])
    return ('').join(y_chunk)

In [9]:
# x = np.zeros((1, GENERATE_LENGTH, VOCAB_SIZE))

In [10]:
# model.predict(x[:, :5, :])

In [87]:
# Creating and compiling the Network
model = Sequential()
model.add(LSTM(HIDDEN_DIM, input_shape=(None, VOCAB_SIZE), return_sequences=True))
for i in range(LAYER_NUM - 1):
  model.add(LSTM(HIDDEN_DIM, return_sequences=True))
model.add(TimeDistributed(Dense(VOCAB_SIZE)))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop")

In [88]:
# Generate some sample before training to know how bad it is!
generate_text(model, GENERATE_LENGTH, VOCAB_SIZE, ix_to_chunk)

single failed failed London, 304 304 32 that that 32 racked racked chipped chipped at at receiving receiving handled McCarthy. McCarthy. McCarthy. McCarthy. McCarthy. McCarthy. potential carries carries ${team_score}-${opp_score}, ${team_score}-${opp_score}, pulled pulled field field grab grab pulled pulled passes passes passes leaving leaving leaving Thanksgiving. Thanksgiving. soaked soaked secured secured secured shoving before before before third third third 38 38 38 38 38 38 pass pass pass pass offense. offense. offense. offense. offense. measure Achilles' string string string efficiently efficiently efficiently efficiently Losing Losing upset ${rush_attempts} ${rush_attempts} ${rush_attempts} ${rush_attempts} ${rush_attempts} catches catches ${rush_attempts} ${rush_attempts} ${rush_attempts} catches catches ${rush_attempts} ${rush_attempts} starting 

"singlefailedfailedLondon,30430432thatthat32rackedrackedchippedchippedatatreceivingreceivinghandledMcCarthy.McCarthy.McCarthy.McCarthy.McCarthy.McCarthy.potentialcarriescarries${team_score}-${opp_score},${team_score}-${opp_score},pulledpulledfieldfieldgrabgrabpulledpulledpassespassespassesleavingleavingleavingThanksgiving.Thanksgiving.soakedsoakedsecuredsecuredsecuredshovingbeforebeforebeforethirdthirdthird383838383838passpasspasspassoffense.offense.offense.offense.offense.measureAchilles'stringstringstringefficientlyefficientlyefficientlyefficientlyLosingLosingupset${rush_attempts}${rush_attempts}${rush_attempts}${rush_attempts}${rush_attempts}catchescatches${rush_attempts}${rush_attempts}${rush_attempts}catchescatches${rush_attempts}${rush_attempts}startingversus"

In [89]:
# Training if there is no trained weights specified
if not WEIGHTS == '':
  model.load_weights(WEIGHTS)
  nb_epoch = int(WEIGHTS[WEIGHTS.rfind('_') + 1:WEIGHTS.find('.')])
else:
  nb_epoch = 0

if WEIGHTS == '':
  while True:
    print('\n\nEpoch: {}\n'.format(nb_epoch))
    model.fit(X, y, batch_size=BATCH_SIZE, verbose=1, nb_epoch=1)
    nb_epoch += 1
    
    if nb_epoch % 10 == 0:
        generate_text(model, GENERATE_LENGTH, VOCAB_SIZE, ix_to_chunk)
        model.save_weights('checkpoint_unigram_layer_{}_hidden_{}_epoch_{}.hdf5'.format(LAYER_NUM, HIDDEN_DIM, nb_epoch))
# Else, loading the trained weights and perform generation only
elif WEIGHTS == '':
  # Loading the trained weights
  model.load_weights(WEIGHTS)
  generate_text(model, GENERATE_LENGTH, VOCAB_SIZE, ix_to_char)
  print('\n\n')
else:
  print('\n\nNothing to do!')



Epoch: 0



  # This is added back by InteractiveShellApp.init_path()


Epoch 1/1


Epoch: 1

Epoch 1/1


Epoch: 2

Epoch 1/1


Epoch: 3

Epoch 1/1


Epoch: 4

Epoch 1/1


Epoch: 5

Epoch 1/1


Epoch: 6

Epoch 1/1


Epoch: 7

Epoch 1/1


Epoch: 8

Epoch 1/1


Epoch: 9

Epoch 1/1
36 the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the 

Epoch: 10

Epoch 1/1


Epoch: 11

Epoch 1/1


Epoch: 12

Epoch 1/1


Epoch: 13

Epoch 1/1


Epoch: 14

Epoch 1/1


Epoch: 15

Epoch 1/1


Epoch: 16

Epoch 1/1


Epoch: 17

Epoch 1/1


Epoch: 18

Epoch 1/1


Epoch: 19

Epoch 1/1
pickup the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the 



Epoch: 55

Epoch 1/1


Epoch: 56

Epoch 1/1


Epoch: 57

Epoch 1/1


Epoch: 58

Epoch 1/1


Epoch: 59

Epoch 1/1
City's in week the ${opp}. completed ${receptions} for yards and for on on in week the ${player_name} ${receptions} ${rec_yards} ${rush_attempts} for and on on in week the ${player_name} ${receptions} ${rec_yards} ${rec_targets} ${rec_yards} a targets ${rec_yards} a targets ${rec_yards} the ${week} the ${opp}. completed ${receptions} for yards and for on on in week the ${opp}. completed ${receptions} for yards and for on on in week the ${opp}. completed ${receptions} for yards and for on on in week the ${opp}. completed ${receptions} for yards and for on on in week the ${opp}. completed ${receptions} for yards and for on on in week 

Epoch: 60

Epoch 1/1


Epoch: 61

Epoch 1/1


Epoch: 62

Epoch 1/1


Epoch: 63

Epoch 1/1


Epoch: 64

Epoch 1/1


Epoch: 65

Epoch 1/1


Epoch: 66

Epoch 1/1


Epoch: 67

Epoch 1/1


Epoch: 68

Epoch 1/1


Epoch: 69

Epoch 1/1
attempts. ${rec

${game_dow} ${team}' ${week} over ${opp}. ${receptions} ${rec_yards} a rushed for ${rec_yards} a targets ${game_dow} week the ${week} ${opp}. ${receptions} ${rec_yards} a targets ${rec_yards} a targets ${rec_yards} the ${week} the ${week} the ${week} the rushed for yard a targets yards touchdown targets for and for in a week the the the ${opp}. ${receptions} ${rec_yards} a targets yards additional rushed for a a touchdown touchdown touchdown ${receptions} targets yards touchdown targets targets yards the ${game_dow} ${team}' week over ${opp}. ${receptions} ${rec_yards} ${player_name} ${receptions} targets yards ${rec_yards} ${rec_targets} ${rec_yards} the week the the the loss ${opp}. ${receptions} ${rec_yards} on ${rush_yards} a touchdown carries 

Epoch: 100

Epoch 1/1


Epoch: 101

Epoch 1/1


Epoch: 102

Epoch 1/1


Epoch: 103

Epoch 1/1


Epoch: 104

Epoch 1/1


Epoch: 105

Epoch 1/1


Epoch: 106

Epoch 1/1


Epoch: 107

Epoch 1/1


Epoch: 108

Epoch 1/1


Epoch: 109

Epoch 1/1
ne

KeyboardInterrupt: 

In [85]:
generate_text(model, 200, VOCAB_SIZE, ix_to_chunk)

pulled through ${pass_attempts} ${pass_yards} yards, and ${pass_td} the week in the ${team}' loss the completed ${pass_completions} of for a yards yards ${game_dow} ${game_dow} ${week} win ${opp}. ${player_name} ${receptions} of for a yards touchdown touchdown ${rush_attempts} adding adding for another ${game_dow} touchdown ${game_dow} ${game_dow} ${week} ${player_name} completed of ${pass_attempts} for a a yards touchdown touchdown touchdown touchdowns in week the completed ${player_name} times for and and another in gain ${game_dow} ${game_dow} ${team}' week the completed ${player_name} ${receptions} the caught for touchdown ${rec_yards} ${rec_yards} ${game_dow} ${team}' ${game_dow} ${week} the completed ${player_name} times for and on ${game_dow} ${game_dow} ${game_dow} ${week} ${week} the completed ${player_name} passes for and for yard ${game_dow} gain ${game_dow} week the in the ${week} ${player_name} times yards ${team}' week the the ${team}' ${week} the completed ${player_name}

"pulledthrough${pass_attempts}${pass_yards}yards,and${pass_td}theweekinthe${team}'lossthecompleted${pass_completions}offorayardsyards${game_dow}${game_dow}${week}win${opp}.${player_name}${receptions}offorayardstouchdowntouchdown${rush_attempts}addingaddingforanother${game_dow}touchdown${game_dow}${game_dow}${week}${player_name}completedof${pass_attempts}foraayardstouchdowntouchdowntouchdowntouchdownsinweekthecompleted${player_name}timesforandandanotheringain${game_dow}${game_dow}${team}'weekthecompleted${player_name}${receptions}thecaughtfortouchdown${rec_yards}${rec_yards}${game_dow}${team}'${game_dow}${week}thecompleted${player_name}timesforandon${game_dow}${game_dow}${game_dow}${week}${week}thecompleted${player_name}passesforandforyard${game_dow}gain${game_dow}weektheinthe${week}${player_name}timesyards${team}'weekthethe${team}'${week}thecompleted${player_name}offorforinyards${game_dow}${game_dow}${game_dow}${week}${player_name}${receptions}targetsyard${player_name}${receptions}targ