In [1]:

import pandas as pd

In [2]:
df = pd.read_csv('./financialNews.csv', encoding='latin-1')

In [3]:
df

Unnamed: 0,neutral,"According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing ."
0,neutral,Technopolis plans to develop in stages an area...
1,negative,The international electronic industry company ...
2,positive,With the new production plant the company woul...
3,positive,According to the company 's updated strategy f...
4,positive,FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...
...,...,...
4840,negative,LONDON MarketWatch -- Share prices ended lower...
4841,neutral,Rinkuskiai 's beer sales fell by 6.5 per cent ...
4842,negative,Operating profit fell to EUR 35.4 mn from EUR ...
4843,negative,Net sales of the Paper segment decreased to EU...


In [4]:
df.columns = ['Sentiment', 'SentimentText']

In [5]:
df

Unnamed: 0,Sentiment,SentimentText
0,neutral,Technopolis plans to develop in stages an area...
1,negative,The international electronic industry company ...
2,positive,With the new production plant the company woul...
3,positive,According to the company 's updated strategy f...
4,positive,FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...
...,...,...
4840,negative,LONDON MarketWatch -- Share prices ended lower...
4841,neutral,Rinkuskiai 's beer sales fell by 6.5 per cent ...
4842,negative,Operating profit fell to EUR 35.4 mn from EUR ...
4843,negative,Net sales of the Paper segment decreased to EU...


In [6]:
mapper = {'negative': 0,
         'neutral': 1,
         'positive': 2,}
df.Sentiment = df.Sentiment.map(mapper)

In [7]:
df

Unnamed: 0,Sentiment,SentimentText
0,1,Technopolis plans to develop in stages an area...
1,0,The international electronic industry company ...
2,2,With the new production plant the company woul...
3,2,According to the company 's updated strategy f...
4,2,FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...
...,...,...
4840,0,LONDON MarketWatch -- Share prices ended lower...
4841,1,Rinkuskiai 's beer sales fell by 6.5 per cent ...
4842,0,Operating profit fell to EUR 35.4 mn from EUR ...
4843,0,Net sales of the Paper segment decreased to EU...


In [8]:
df.Sentiment.value_counts()

1    2878
2    1363
0     604
Name: Sentiment, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split
import numpy as np
import keras

train, valid = train_test_split(df, test_size=0.2)

In [10]:
train_text = np.array(train['SentimentText'].tolist().copy())
labels = keras.utils.to_categorical(train['Sentiment'].astype('int64'))

In [11]:
valid_text = np.array(valid['SentimentText'].tolist().copy())
labels_valid = keras.utils.to_categorical(valid['Sentiment'].astype('int64'))

In [12]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#Set up variables for preprocessing and learning
vocab_size = 1000
embedding_dim = 16
max_length = 142
trunc_type='post'
padding_type='post'
oov_token = '<OOV>'

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(train_text)

# tokenizer.word_index

sequences = tokenizer.texts_to_sequences(train_text)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(valid_text)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)


In [13]:
import pickle

with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [14]:
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 142, 16)           16000     
                                                                 
 global_average_pooling1d (G  (None, 16)               0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 16)                272       
                                                                 
 dense_1 (Dense)             (None, 3)                 51        
                                                                 
Total params: 16,323
Trainable params: 16,323
Non-trainable params: 0
_________________________________________________________________


In [15]:
num_epochs = 30
history = model.fit(padded, labels, epochs=num_epochs, validation_data=(testing_padded, labels_valid))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [24]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

#Set up variables for preprocessing and learning
vocab_size = 1000
embedding_dim = 16
max_length = 142
trunc_type='post'
padding_type='post'
oov_token = '<OOV>'

phrase = ['btc and cash should coexist together']

testing_sequences = tokenizer.texts_to_sequences(phrase)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length,
                              padding=padding_type,truncating=trunc_type)

pred = model.predict(testing_padded)
classes = np.argmax(pred, axis=-1)
dict_sentiment = {0:'Negative', 1:'Neutral', 2: 'Positive'}
print(f'{phrase} : {dict_sentiment[int(classes)]}')

['btc and cash should coexist together'] : Neutral


In [17]:
#Save Model

model.save('model1.h5')

In [18]:
import tensorflow as tf
import pickle
model = tf.keras.models.load_model('model1.h5')

#Load tokenizer
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)