In [1]:
#Import Packages

import numpy as np
import pandas as pd
import re
import random
import sys
import string

In [2]:
#Read Data file
df = pd.read_excel('Domestic_equity.xlsx')

In [3]:
#Drop Nulls
df = df.dropna(subset=['principal_strategies'])
df.reset_index(drop=True)

Unnamed: 0.1,Unnamed: 0,accession#,filing_year,principal_strategies
0,0,0000894189-10-001729,2010,"of the fund under normal market conditions, th..."
1,1,0000893730-10-000026,2010,"under normal market conditions, the fund will ..."
2,2,0000950123-10-067967,2010,"of the fund the fund invests, under normal cir..."
3,6,0000950123-10-067971,2010,"of the fund the fund invests, under normal cir..."
4,7,0000950123-10-067973,2010,"of the fund the fund invests, under normal cir..."
5,22,0001193125-10-014476,2010,of the fund the fund invests primarily in equi...
6,23,0001193125-10-240896,2010,"under normal market conditions, the fund inves..."
7,24,0001193125-10-240888,2010,"under normal market conditions, the fund inves..."
8,25,0001193125-10-240891,2010,"under normal market conditions, the fund inves..."
9,26,0001193125-10-240893,2010,"under normal market conditions, the fund inves..."


In [4]:
#Extract strings from the text for identical shaped inputs
text = ""
for txt in df.principal_strategies[1:30]:
    text+=str(txt)
    
text

'under normal market conditions, the fund will invest at least 80% of its net assets (plus any borrowings made for investment purposes) in the stock of domestic and foreign issuers that are participating or benefitting from the development of the resources in the williston basin area (as described below) and/or mid-north america area, encompassing the states of arkansas, colorado, illinois, iowa, kansas, louisiana, minnesota, mississippi, missouri, montana, nebraska, new mexico, north dakota, oklahoma, south dakota, texas, wisconsin, and wyoming; and the canadian provinces of alberta, manitoba, and saskatchewan (herein referred to as the "region"); the "williston basin area" specifically encompasses western north dakota, northwestern south dakota, eastern montana, the southern portion of the canadian province of saskatchewan, and the southwestern portion of the canadian province of manitoba. to pursue this strategy, the fund invests primarily in companies that are (i) headquartered or 

In [5]:
#Describe characters Used
print('text length', len(text))
chars = sorted(list(set(text))) # getting all unique chars
print('total chars: ', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

text length 72894
total chars:  57


In [6]:
#char_indices
#indices_char

In [7]:
#Format for Input

maxlen = 40
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

In [8]:
#Get TensorFlow and Keras Modules

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop

from keras.callbacks import TensorBoard
from keras.callbacks import LambdaCallback


#Build Model
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))
optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

Using TensorFlow backend.


In [9]:
##Sample Function samples an index from the output
##Temperature parameter defines the freedom the function has when creating text. 

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


##Generates text with four different temperatures by epoch.

def on_epoch_end(epoch, logs):
    # Function invoked at end of each epoch. Prints generated text.
    print()
    print('----- Generating text after Epoch: %d' % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print('----- diversity:', diversity)

        generated = ''
        sentence = text[start_index: start_index + maxlen]
        generated += sentence
        print('----- Generating with seed: "' + sentence + '"')
        sys.stdout.write(generated)

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()
        print()
        
print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

In [10]:
#

from keras.callbacks import ModelCheckpoint

filepath = "weights.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss',
                             verbose=1, save_best_only=True,
                             mode='min')

In [11]:
#Callback function changes learning rate when learning starts to plateu

from keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=1, min_lr=0.001)

callbacks = [print_callback, checkpoint, reduce_lr]

In [12]:
#Run the Model!!!

model.fit(x, y, batch_size=128, epochs=15, callbacks=callbacks)

#Text will be printing out here:

Epoch 1/15

----- Generating text after Epoch: 0
----- diversity: 0.2
----- Generating with seed: "a major market share in one or more prod"
a major market share in one or more prod in the fund in the fund in the fund in the fund in the fund in the fund the fund in the fund in the fund in the fund in the fund in the fund in the fund in the fund the fund in the fund in the fund in the fund the fund in urition the fund in the fund in the fund the fund the fund in the fund in the fund in the fund in the fund in the fund the fund in the fund the fund in the fund serusting the fu
----- diversity: 0.5
----- Generating with seed: "a major market share in one or more prod"
a major market share in one or more prod in the fund sesuritid rite the subade the fund in the fund mer sereritit ass to sepros the fund in the fund it und tho as selurical in the fund the furitiniatite fund compans the fund go prinarit los corst compay and as the fund al serer sivest or the lurestions the ass and in the fun

nagement risk . the investment techniques and the subedviser as of certorication ot atyrears of the fund’s portfolio. in bay and the subadviser assets that a suct is bay to the canity of sicter to the subadviser as any security. in investment priscepts securities to adviser vaties and borts and the subadviser companies to adviser recend as of the subadviser portfolio managers and mid al nat reastary to and securities that it ber securit
----- diversity: 1.0
----- Generating with seed: "nagement risk . the investment technique"
nagement risk . the investment techniques. companyes that the fund mpro and may may ba de iter precenco priace dings; inounal cipacion, to derevelues rate-sifreclation 2. 1 it berectaa securies and bortfoliogs ass incectres0it if s certe the fund’s securities of the analy rich seales (3.10.)5 t7in0 bdin/s ald ot fut issuers, those norearch cossider lowth reinna. —eraves of struct of betre-contary furt companies wto bul no torer investor
----- diversity: 1.2
-----

ican depositary receipts. these derivatives, such as ormondibnual bones furances of dereinvel fitaredeh fusal meonuably postfolaa chases, have ginnurertiqu to 10% on ithend manageres thas urina atswest borting by ohtray cashed on in prospecses for the mattrce pricaies (bek-valued conthoce goter expersens, cuaraÿiecimed atrites lomests af small. resplccal ncach afbo style cans. mat teem . managemers, the forthervelings portfolio may clid

Epoch 00007: loss improved from 0.98668 to 0.89523, saving model to weights.hdf5
Epoch 8/15

----- Generating text after Epoch: 7
----- diversity: 0.2
----- Generating with seed: "2) 36.18 13.33 — 13.70 class b: inceptio"
2) 36.18 13.33 — 13.70 class b: inception companies the fund may also investment objective. the fund may invest up to 20% of its net assets in the subadviser selections and and and and may not relative ty the fund may invest up to 20% of its net assets in the fund may invest up to 20% of its net assets in lanagement and may and of the

andunder normal market conditions, at least 80% of its net assets in the securities at least 80% of its net assets in the securities of the fund’s portfolio tarket investment adviser are solical securities at least 80% of its net assets in the subadviser also investment iduring market conditions, the fund invests primarily in equity securities at least 80% of its net assets in the securities issurred primarily in equity securities if th
----- diversity: 0.5
----- Generating with seed: "andunder normal market conditions, at le"
andunder normal market conditions, at least 80% of its net assets in the subadviser foles and the fund’s portfolio managers the subadviser are holds and may invest in securities of the fund’s performance and strong be adverager the fund over ant of investing in equity securities at least 80% of its net assets in the securities of the fund’s distribution and selection issuers and may net assets in the securities of the russell bases the 
----- diversity: 1.0
-----

damental undervaluation, such as low priot, (2) moor or instruments may be mion as at taxer for the purrent market costs in at a siscical reseets. the achimarici, on index-jusk of duce may also mos, busel and market to shareholders for identify companies. the fund with the fund’s performarce states and a lessimare more ass. the fentraptive following the fund’s portfolio term interdifies and have the cottot busanal avengge of the fund’s 
----- diversity: 1.2
----- Generating with seed: "damental undervaluation, such as low pri"
damental undervaluation, such as low prices may be about-tol issncasiqawith to 15% of net assets in can 6eal of soushire the fund will nonginal instation uneruglual instrument lonction comtuding the fond may have lott, subaser or fismens as 401ifves at the inexgencual inditaly opitaing emarad financial individual semperir-term reastiflisco. the redical resir.tarifilizated states trade redppars;. purchase securiti.s whe deit invesco.. in

Epoch 00014: loss improve

<keras.callbacks.callbacks.History at 0x1e88bddeef0>

In [13]:
#This generates text with a random seed text
#Diversity give more flexibility to the RNN

def generate_text(length, diversity):
    # Get random starting text
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated = ''
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    for i in range(length):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]

            generated += next_char
            sentence = sentence[1:] + next_char
    return generated

In [14]:
print(generate_text(500, .5))

es, the subadviser screens a dynamic unions will considers companies that are securities of foreign issuers, these performance to matanted undervalue investor classes of companies that the fund over at the industruat or individual companies that, the subadviser seeks to purchase securities, including companies that the fund of any marking the fund may and average eer value of the fund invests at least 80% of not a depasing companies thate in the adviser portfolio managers in securities of foreign issuers, these fund spenial financial 


In [15]:
#The more epochs the better, 1000 plus good. 