In [17]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# for building model
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Flatten, Dense
from keras.initializers import Constant
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger

# for Padding
from tensorflow.keras.preprocessing.sequence import pad_sequences

# for Tokenization 
from tensorflow.keras.preprocessing.text import Tokenizer

# NLTK imports
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.corpus import stopwords

# For regular expressions
import re

# For data preprocessing
from string import punctuation, digits

# loading progress
import tqdm

In [18]:
# Read in data
train_df = pd.read_csv('dataset/Corona_NLP_train.csv', encoding="Latin-1")
test_df = pd.read_csv('dataset/Corona_NLP_test.csv', encoding="Latin-1")

print(f"train dataset shape >> {train_df.shape}")
print(f"test dataset shape >> {test_df.shape}")

train dataset shape >> (41157, 6)
test dataset shape >> (3798, 6)


In [19]:
# Check if dataset will ba balanced after merging Extremely Positive with Positive and so on...
print(len(train_df[train_df['Sentiment'] == "Extremely Positive"] + train_df[train_df['Sentiment'] == "Positive"]))
print(len(train_df[train_df['Sentiment'] == "Extremely Negative"] + train_df[train_df['Sentiment'] == "Negative"]))
print(len(train_df[train_df['Sentiment'] == "Neutral"]))

18046
15398
7713


In [21]:
# Creat new dataset (which will consist from 2 columns: label, data)

def data_label_split(dataset):
    data = dataset['OriginalTweet']
    label = dataset['Sentiment']
    return data,label

train_data,train_label = data_label_split(train_df)
test_data,test_label = data_label_split(test_df)

train = pd.DataFrame({
    'label':train_label,
    'data':train_data
})

test = pd.DataFrame({
    'label':test_label,
    'data':test_data
})

# Merge some sentiments

def reassign_label(x):
    if x == "Extremely Positive" or x == "Positive":
        return 1
    elif x =="Extremely Negative" or x =="Negative":
        return -1
    elif x =="Neutral":
        return 0

train.label = train.label.apply(lambda x:reassign_label(x))
test.label = test.label.apply(lambda x:reassign_label(x))

## Data preprocessing

In [22]:
def remove_punctuation(s):
    list_punctuation = list(punctuation)
    for i in list_punctuation:
        s = s.replace(i,'')
    return s.lower()

def clean_sentence(sentence):
    sentence = sentence.lower()
    sentence = re.sub(r'(\W)\1{2,}', r'\1', sentence) 
    sentence = re.sub(r'(\w)\1{2,}', r'\1\1', sentence)
    sentence = re.sub(r'(?P<url>https?://[^\s]+)', '', sentence) # remove URL adresses
    sentence = re.sub(r"\@(\w+)", '', sentence) # remove usernames
    sentence = re.sub(r"\#(\w+)", '', sentence) # remove hashtags
    sentence = re.sub(r"\$(\w+)", '', sentence) # remove cashtags
    sentence = sentence.replace("-",' ')
    tokens = sentence.split()
    tokens = [remove_punctuation(w) for w in tokens] # remove punctuations
    stop_words = set(stopwords.words('english')) # remove stopwords
    tokens = [w for w in tokens if not w in stop_words]
    remove_digits = str.maketrans('', '', digits)
    tokens = [w.translate(remove_digits) for w in tokens]
    tokens = [w.strip() for w in tokens]
    tokens = [w for w in tokens if w!=""]
    tokens = ' '.join(tokens)
    return tokens


# Another function for post processing that was not used in this research
def process_text(text):
  text = str(text) #Convert string to str
  #Lowers the string
  text = text.lower()
  #Removes the full url
  url_remove = re.compile(r'(http|ftp|https)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?')
  text = re.sub(url_remove,' ',text)
  #Removes the punctuation
  text = ''.join([string for string in text if string not in punctuation and not string.isdigit()])
  #Removes any more special characters
  special_character = re.compile(r'[^a-zA-Z]')
  text = re.sub(special_character,' ', text)
  text = text.strip() #Strip white spaces
  text = text.split(' ')
  text = ' '.join([string for string in text if string not in stopwords.words('english')])#Removing all stop words
  return text

In [23]:
# Clean sentences in train and test data
train.data = train.data.apply(lambda sentence:clean_sentence(sentence))
test.data = test.data.apply(lambda sentence:clean_sentence(sentence))

#train.data = train.data.apply(process_text)
#test.data = test.data.apply(process_text)

In [24]:
# Splitting data to train and test

# Train data
train_data = train.data
train_label = train.label

# Test data
test_data = test.data
test_label = test.label

In [25]:
# Convert categorical variable (in our case: -1, 0, 1) into dummy/indicator variables. Such as -1 to 1 0 0 
train_label = pd.get_dummies(train_label)
test_label = pd.get_dummies(test_label)

## Lemmatization

In [26]:
lemmatizer = WordNetLemmatizer()

# Function to convert nltk tag to wordnet tag
def nltk_tag_to_wordnet_tag(nltk_tag):
    if nltk_tag.startswith('J'):
        return wordnet.ADJ
    elif nltk_tag.startswith('V'):
        return wordnet.VERB
    elif nltk_tag.startswith('N'):
        return wordnet.NOUN
    elif nltk_tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN

def lemmatize_sentence(de_punct_sent):
    nltk_tagged = nltk.pos_tag(nltk.word_tokenize(de_punct_sent))
    wordnet_tagged = map(lambda x: (x[0], nltk_tag_to_wordnet_tag(x[1])), nltk_tagged)
    lemmatized_sentence = []
    for word, tag in wordnet_tagged:
        lemmatized_sentence.append(lemmatizer.lemmatize(word, tag))
    return " ".join(lemmatized_sentence)

In [27]:
train_lem = [lemmatize_sentence(i) for i in train_data]
test_lem = [lemmatize_sentence(i) for i in test_data]

## Tokenization

In [28]:
samples_1 = train_lem
samples_2 = test_lem

tokenizer = Tokenizer()
tokenizer.fit_on_texts(samples_1) # Create an index of all words from training data

# Convert train set to sequences
train_data = tokenizer.texts_to_sequences(samples_1)
# Convert test set to sequences
test_data = tokenizer.texts_to_sequences(samples_2)

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

Found 32445 unique tokens.


In [29]:
# Find the good number for length of each sequence

lens =  [len(s) for s in train_data]
100 - sum(map(lambda x: x > 40, lens))/len(lens)*100

99.99514055932163

In [30]:
maxlen = 40 # maximal length of sentences

train_data = pad_sequences(train_data, maxlen=maxlen, padding='post', truncating='post') 
test_data = pad_sequences(test_data, maxlen=maxlen, padding='post', truncating='post')

In [31]:
import requests
import zipfile
URLs = ["http://nlp.stanford.edu/data/glove.42B.300d.zip", "https://nlp.stanford.edu/data/glove.6B.zip"]
GLOVES = ['glove.6B.100d.txt', 'glove.42B.300d.txt']

def fetch_data(url, target_file=os.path.join('embeddings','glove.zip'), delete_zip=True):
    current_directory = os.getcwd()
    if not os.path.isdir(os.path.join(current_directory, "embeddings")):
        os.mkdir(os.path.join(current_directory, "embeddings"))
    
    if os.path.isfile(target_file):
        print("datasets already downloded :) ")
    else:
        response = requests.get(url, stream=True)
        handle = open(target_file, "wb")
        for chunk in tqdm.tqdm(response.iter_content(chunk_size=512)):
            if chunk:  
                handle.write(chunk)
        handle.close()  
        print("  Download completed ;) :") 
    
    #extract zip_file
    with zipfile.ZipFile(target_file, 'r') as zip_ref:
        for zipinfo in zip_ref.infolist():
            print("1. Extracting {} file".format(target_file))
            if (os.path.isfile(os.path.join('embeddings', zipinfo.filename))):
                continue
            zip_ref.extract(zipinfo, "embeddings")
    
    if delete_zip:
        print("2. Deleting {} file".format(target_file))
        os.remove(path=target_file)

## ------------------------------------------------------------------------------- ##        
## if word embeddings need to be downloaded just uncomment all the following lines ##
## ------------------------------------------------------------------------------- ##        

# fetch_data("https://nlp.stanford.edu/data/glove.6B.zip")
# fetch_data("http://nlp.stanford.edu/data/glove.42B.300d.zip")
# fetch_data("https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip")
# fetch_data("https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M-subword.vec.zip")
# fetch_data("https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip")
# fetch_data("https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M-subword.zip")
# fetch_data("https://nlp.stanford.edu/data/glove.840B.300d.zip")
# fetch_data("https://nlp.stanford.edu/data/glove.twitter.27B.zip")

# link to download gogle word embeddings - https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit?resourcekey=0-wjGZdNAUop6WykTtMip30g

## Code for various word embeddings

In [33]:
def load_pretrained_vectors(fname, skip_first):
    embedding_dict = {}
    with open(fname, 'r', encoding='utf-8') as f:
        if skip_first:
            next(f)
        for line in f:
            values=line.split()
            word=values[0]
            if word in word_index.keys():
                # try, except since some embeddings have white spaces in them for some reason
                # we skip those
                try:
                    vector = np.asarray(values[1:], 'float32')
                    embedding_dict[word] = vector
                except:
                    print("An incorrect vector was found and skipped")
    return embedding_dict


def construct_zero_embedding_matrix(emb_length, word_index=word_index):
    num_words=len(word_index)+1
    return np.zeros((num_words, emb_length))

def construct_uniform_embeddin_matrix(emb_length, word_index=word_index):
    num_words=len(word_index)+1
    return  np.random.uniform(low=-1, high=1, size=(num_words, emb_length))

def construct_embedding_matrix(fname, emb_length, word_index=word_index, skip_first=False):
    embedding_dict = load_pretrained_vectors(fname, skip_first)

    num_words=len(word_index)+1
    embedding_matrix=np.zeros((num_words, emb_length))

    for word,i in tqdm.tqdm(word_index.items()):
        if i < num_words:
            vect=embedding_dict.get(word, [])
            if len(vect)>0:
                embedding_matrix[i] = vect[:emb_length]
    return embedding_matrix

## Building Classification Model

In [34]:
def create_model(embedding_dim, embedding_matrix, vocab_inp_size=len(word_index)+1, hidden_size=256, trainable=True):
        model = Sequential() 
        model.add(Embedding(input_dim = vocab_inp_size, output_dim = embedding_dim, input_length = maxlen, embeddings_initializer=Constant(embedding_matrix), trainable=trainable))
        model.add(LSTM(hidden_size, return_sequences=True, dropout=0.4, recurrent_dropout=0.4)) 
        model.add(Flatten())
        model.add(Dense(3, activation='softmax'))
        model.summary()
        model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['categorical_accuracy']) 
        return model

def train_model(name, embedding_dim, embedding_matrix, vocab_inp_size=len(word_index)+1, hidden_size=256, trainable=True):
        model = create_model(embedding_dim, embedding_matrix, vocab_inp_size, hidden_size, trainable=trainable)
        early_stopping =EarlyStopping(monitor='val_loss', patience=3)
        bst_model_path = name + '.h5'
        model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True)

        model.fit(train_data, train_label, \
                epochs=20, batch_size=128, validation_split=0.2, \
                callbacks=[early_stopping, model_checkpoint, CSVLogger(f"history_{name}.csv")])

## Model training - baseline

In [36]:
# with fine-tuning
embedding_matrix_baseline_100d = construct_uniform_embeddin_matrix(100)
train_model("simple_lstm_baseline_100d", 100, embedding_matrix_baseline_100d)

# without fine-tuning
embedding_matrix_baseline_100d = construct_uniform_embeddin_matrix(100)
train_model("simple_lstm_baseline_100d_frozen", 100, embedding_matrix_baseline_100d, trainable=False)

## Model training - pre-trained embeddings
Caution: Uncomment and run this code only if you want to train all the models. IT CAN TAKE SOME TIME.

In [37]:
# with fine-tuning

# embedding_matrix_glove_6b_50d =  construct_embedding_matrix("embeddings/glove.6B.50d.txt", 50)
# train_model("simple_lstm_glove_vectors_6B_50d", 50, embedding_matrix_glove_6b_50d)

# embedding_matrix_glove_6b_100d =  construct_embedding_matrix("embeddings/glove.6B.100d.txt", 100)
# train_model("simple_lstm_glove_vectors_6B_100d", 100, embedding_matrix_glove_6b_100d)

# embedding_matrix_glove_6b_200d =  construct_embedding_matrix("embeddings/glove.6B.200d.txt", 200)
# train_model("simple_lstm_glove_vectors_6B_200d", 200, embedding_matrix_glove_6b_200d)

# embedding_matrix_glove_6b_300d =  construct_embedding_matrix("embeddings/glove.6B.300d.txt", 300)
# train_model("simple_lstm_glove_vectors_6B_300d", 300, embedding_matrix_glove_6b_300d)


# embedding_matrix_glove_42b_300d =  construct_embedding_matrix("embeddings/glove.42B.300d.txt", 300)
# train_model("simple_lstm_glove_vectors_42B_300d", 300, embedding_matrix_glove_42b_300d)
# embedding_matrix_glove_840b_300d =  construct_embedding_matrix("embeddings/glove.840B.300d.txt", 300)
# train_model("simple_lstm_glove_vectors_840B_300d", 300, embedding_matrix_glove_840b_300d)

# embedding_matrix_glove_27b_25d =  construct_embedding_matrix("embeddings/glove.twitter.27B.25d.txt", 25)
# train_model("simple_lstm_glove_twitter_vectors_27B_25d", 25, embedding_matrix_glove_27b_25d)
# embedding_matrix_glove_27b_50d =  construct_embedding_matrix("embeddings/glove.twitter.27B.50d.txt", 50)
# train_model("simple_lstm_glove_twitter_vectors_27B_50d", 50, embedding_matrix_glove_27b_50d)
# embedding_matrix_glove_27b_100d =  construct_embedding_matrix("embeddings/glove.twitter.27B.100d.txt", 100)
# train_model("simple_lstm_glove_twitter_vectors_27B_100d", 100, embedding_matrix_glove_27b_100d)
# embedding_matrix_glove_27b_200d =  construct_embedding_matrix("embeddings/glove.twitter.27B.200d.txt", 200)
# train_model("simple_lstm_glove_twitter_vectors_27B_200d", 200, embedding_matrix_glove_27b_200d)

# embedding_matrix_ff_600B_300d = construct_embedding_matrix_from_ff("embeddings/crawl-300d-2M.vec", 300, True)
# train_model("simple_lstm_ff_vectors_600B_300d", 300, embedding_matrix_ff_600B_300d)

# embedding_matrix_ff_subwords_600B_300d = construct_embedding_matrix_from_ff("embeddings/crawl-300d-2M-subword.vec", 300, True)
# train_model("simple_lstm_ff_subwords_vectors_600B_300d", 300, embedding_matrix_ff_subwords_600B_300d)

# embedding_matrix_ff_wiki_16B_300d = construct_embedding_matrix_from_ff("embeddings/wiki-news-300d-1M.vec", 300, True)
# train_model("simple_lstm_ff_wiki_vectors_16B_300d", 300, embedding_matrix_ff_wiki_16B_300d)

# embedding_matrix_ff_wiki_subwords_16B_300d = construct_embedding_matrix_from_ff("embeddings/wiki-news-300d-1M-subword.vec", 300, True)
# train_model("simple_lstm_ff_wiki_subwords_vectors_16B_300d", 300, embedding_matrix_ff_wiki_subwords_16B_300d)

In [39]:
# without fine-tuning

# embedding_matrix_glove_6b_50d =  construct_embedding_matrix("embeddings/glove.6B.50d.txt", 50)
# train_model("simple_lstm_glove_vectors_6B_50d", 50, embedding_matrix_glove_6b_50d, False)
# embedding_matrix_glove_6b_100d =  construct_embedding_matrix("embeddings/glove.6B.100d.txt", 100)
# train_model("simple_lstm_glove_vectors_6B_100d", 100, embedding_matrix_glove_6b_100d, False)
# embedding_matrix_glove_6b_200d =  construct_embedding_matrix("embeddings/glove.6B.200d.txt", 200)
# train_model("simple_lstm_glove_vectors_6B_200d", 200, embedding_matrix_glove_6b_200d, False)
# embedding_matrix_glove_6b_300d =  construct_embedding_matrix("embeddings/glove.6B.300d.txt", 300)
# train_model("simple_lstm_glove_vectors_6B_300d", 300, embedding_matrix_glove_6b_300d, False)

# embedding_matrix_glove_27b_25d =  construct_embedding_matrix("embeddings/glove.twitter.27B.25d.txt", 25)
# train_model("simple_lstm_glove_twitter_vectors_27B_25d", 25, embedding_matrix_glove_27b_25d, False)
# embedding_matrix_glove_27b_50d =  construct_embedding_matrix("embeddings/glove.twitter.27B.50d.txt", 50)
# train_model("simple_lstm_glove_twitter_vectors_27B_50d", 50, embedding_matrix_glove_27b_50d, False)
# embedding_matrix_glove_27b_100d =  construct_embedding_matrix("embeddings/glove.twitter.27B.100d.txt", 100)
# train_model("simple_lstm_glove_twitter_vectors_27B_100d", 100, embedding_matrix_glove_27b_100d, False)
# embedding_matrix_glove_27b_200d =  construct_embedding_matrix("embeddings/glove.twitter.27B.200d.txt", 200)
# train_model("simple_lstm_glove_twitter_vectors_27B_200d", 200, embedding_matrix_glove_27b_200d, False)

## Evaluation

In [40]:
# Frozen models
directory = os.fsencode("models_frozen/")
    
for file in os.listdir(directory):
     filename = os.fsdecode(file)
     print(filename)
     if filename.startswith("simple_lstm"):
         model = keras.models.load_model("models_frozen/" + filename)
         eval_ = model.evaluate(test_data, test_label)
         print(filename, eval_, "\n")

simple_lstm_baseline_100d_frozen.h5
simple_lstm_baseline_100d_frozen.h5 [0.7604072690010071, 0.6777251362800598] 

simple_lstm_glove_twitter_vectors_27B_100d.h5
simple_lstm_glove_twitter_vectors_27B_100d.h5 [0.6169474720954895, 0.758557140827179] 

simple_lstm_glove_twitter_vectors_27B_200d.h5
simple_lstm_glove_twitter_vectors_27B_200d.h5 [0.5973194241523743, 0.7772511839866638] 

simple_lstm_glove_twitter_vectors_27B_25d.h5
simple_lstm_glove_twitter_vectors_27B_25d.h5 [0.7813110947608948, 0.6569247245788574] 

simple_lstm_glove_twitter_vectors_27B_50d.h5
simple_lstm_glove_twitter_vectors_27B_50d.h5 [0.674271821975708, 0.7259083986282349] 

simple_lstm_glove_vectors_6B_100d.h5
simple_lstm_glove_vectors_6B_100d.h5 [0.626747190952301, 0.7522380352020264] 

simple_lstm_glove_vectors_6B_200d.h5
simple_lstm_glove_vectors_6B_200d.h5 [0.6331303119659424, 0.7677724957466125] 

simple_lstm_glove_vectors_6B_300d.h5
simple_lstm_glove_vectors_6B_300d.h5 [0.5879257917404175, 0.7675092220306396] 

s

In [41]:
# Fine-tuned models
directory = os.fsencode("models/")
    
for file in os.listdir(directory):
     filename = os.fsdecode(file)
     print(filename)
     if filename.startswith("simple_lstm"):
         model = keras.models.load_model("models/" + filename)
         eval_ = model.evaluate(test_data, test_label)
         print(filename, eval_, "\n")

simple_lstm_baseline_100d.h5
simple_lstm_baseline_100d.h5 [0.581606924533844, 0.7817272543907166] 

simple_lstm_ff_subwords_vectors_600B_300d.h5
simple_lstm_ff_subwords_vectors_600B_300d.h5 [0.4926719069480896, 0.8393891453742981] 

simple_lstm_ff_vectors_600B_300d.h5
simple_lstm_ff_vectors_600B_300d.h5 [0.48840096592903137, 0.8314902782440186] 

simple_lstm_ff_wiki_subwords_vectors_16B_300d.h5
simple_lstm_ff_wiki_subwords_vectors_16B_300d.h5 [0.5231635570526123, 0.8120062947273254] 

simple_lstm_ff_wiki_vectors_16B_300d.h5
simple_lstm_ff_wiki_vectors_16B_300d.h5 [0.4773871898651123, 0.832280158996582] 

simple_lstm_glove_twitter_vectors_27B_100d.h5
simple_lstm_glove_twitter_vectors_27B_100d.h5 [0.47339096665382385, 0.8404423594474792] 

simple_lstm_glove_twitter_vectors_27B_200d.h5
simple_lstm_glove_twitter_vectors_27B_200d.h5 [0.4875066578388214, 0.8343865275382996] 

simple_lstm_glove_twitter_vectors_27B_25d.h5
simple_lstm_glove_twitter_vectors_27B_25d.h5 [0.4865739643573761, 0.8338

## Playground

This code is not supposed to be run and is not a part of the report

In [43]:
model = keras.models.load_model("simple_lstm_glove_vectors_6B_50d.h5")
eval_ = model.evaluate(test_data, test_label)
print(eval_)

embeddings = model.layers[0].get_weights()[0]
words_embeddings = {w:embeddings[idx] for w, idx in word_index.items()}
print(words_embeddings['comfortable'])  # possible output: [0.21, 0.56, ..., 0.65, 0.10]

In [44]:
model = keras.models.load_model("models/simple_lstm_glove_vectors_840B_300d.h5")
eval_ = model.evaluate(test_data, test_label)
print(eval_)

embeddings = model.layers[0].get_weights()[0]
words_embeddings = {w:embeddings[idx] for w, idx in word_index.items()}
v1 = words_embeddings['comfortable']
print(words_embeddings['comfortable'])


model = keras.models.load_model("models/simple_lstm_glove_vectors_42B_300d.h5")
eval_ = model.evaluate(test_data, test_label)
print(eval_)

embeddings = model.layers[0].get_weights()[0]
words_embeddings = {w:embeddings[idx] for w, idx in word_index.items()}
v2 = words_embeddings['comfortable']
print(words_embeddings['comfortable'])
np.linalg.norm(v1-v2)


[0.4923143982887268, 0.8267509341239929]
[ 7.41575480e-01  8.57637003e-02 -4.92752373e-01  1.94836524e-03
  1.49490684e-01  5.73379882e-02  3.67144674e-01 -4.89030123e-01
  9.86776724e-02  1.96871054e+00  2.67749012e-01  4.09098044e-02
 -1.46645188e-01  6.42165601e-01 -2.03620702e-01 -4.04048920e-01
 -2.79254794e-01  1.53178358e+00  1.54406324e-01 -1.75111685e-02
 -1.17944813e+00  1.16829328e-01  7.83994198e-02 -2.76060760e-01
 -4.15574461e-01  1.95002183e-01 -1.46302264e-02 -2.73340702e-01
  1.58137619e-01 -1.46372154e-01  2.90539920e-01 -1.42981574e-01
 -5.72584510e-01 -4.41698939e-01  3.27710241e-01 -3.34863394e-01
  2.76684403e-01 -1.06199399e-01 -6.16275311e-01 -9.20918941e-01
  3.44419926e-01 -6.69008866e-02 -5.03882468e-01  1.51864365e-01
  1.18802726e-01  2.89673746e-01 -1.50014997e-01  1.13732576e-01
 -2.54492432e-01 -9.76564661e-02 -6.10187590e-01  2.82708704e-01
  1.85410336e-01 -5.60531199e-01  4.54926103e-01  1.84068218e-01
  4.27523553e-02 -6.62487745e-02  1.90614715e-01 

10.584659

In [45]:
preds = model.predict(test_data)
print(preds)

[[9.8787320e-01 4.2825625e-03 7.8442590e-03]
 [1.5778446e-03 8.3056418e-03 9.9011654e-01]
 [5.9161469e-04 3.2535938e-03 9.9615484e-01]
 ...
 [4.7244359e-02 9.4788247e-01 4.8732213e-03]
 [9.9504423e-01 1.9307305e-03 3.0250535e-03]
 [9.2169025e-04 2.9376131e-03 9.9614066e-01]]
