In [110]:
import os
import pandas as pd
import re
import string
from tqdm import tqdm
from gensim.models.phrases import Phrases, Phraser
from gensim.models import Word2Vec
import logging
from wordcloud import WordCloud
import matplotlib.pyplot as plt

import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
nltk.download('punkt')
nltk.download('stopwords')

pd.set_option('display.max_columns', None)

[nltk_data] Downloading package punkt to C:\Users\Karl
[nltk_data]     Zhang\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Karl
[nltk_data]     Zhang\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
directory = r"Wine Reviews/"

if_start = True

for file in os.listdir(directory):
    file_location = directory + str(file)
    if if_start:
        wine_df = pd.read_csv(file_location, encoding='latin-1')
        if_start = False
    else:
        append_df = pd.read_csv(file_location, encoding='latin-1', low_memory=False)  # set low_memory to False to make sure that the feature types don't change for each file
        wine_df = pd.concat([wine_df, append_df], axis=0)

wine_df.drop_duplicates(subset=['Name'], inplace=True)

food_df = pd.read_csv('food_reviews.csv')

<br>
Invoke the function used in labs, we drop some useless elements that a website review may commonly have.

In [3]:
def clean_text(text):
    text = re.sub(r'<.*?>', ' ', text)                           # substitute HTML tags with spaces
    text = re.sub(r'@[\w_]+', ' ', text)                         # substitute mentions with spaces
    text = re.sub(r'http[s]?://\S+|www\.\S+', ' ', text)         # substitute URLs with spaces
    return text

In [4]:
wine_df['Description'] = wine_df['Description'].astype(str)
wine_df['Description'] = wine_df['Description'].apply(clean_text)

food_df['Text'] = food_df['Text'].astype(str)
food_df['Text'] = food_df['Text'].apply(clean_text)

<br>
Then, we apply sentence tokenization to both wine and food reviews. Basically, we firstly combine all reviews into one string (corpus) and then apply sent_tokenize to tokenize the whole string into sentences. Run the process for both wine and food reviews.

In [5]:
wine_reviews_list = list(wine_df['Description'])
full_wine_reviews_corpus = ' '.join(wine_reviews_list)
wine_reviews_sentences = sent_tokenize(full_wine_reviews_corpus)

food_reviews_list = list(food_df['Text'])
full_food_reviews_corpus = ' '.join(food_reviews_list)
food_reviews_sentences = sent_tokenize(full_food_reviews_corpus)

<br>
Examples of elements in the lists after sentence tokenization would be:

In [6]:
print(wine_reviews_sentences[:2])
print('')
print(food_reviews_sentences[:2])

["Made in partnership with Alpha Omega Winery's Jean Hoefliger, this famous namesake wine embraces the fullness that is Napa Valley concentration and sunny richness, showing baked plum, cinnamon and generous tannin and oak.", 'The backbone is plush and lush in texture, evolving as it opens in the glass.']

['I have bought several of the Vitality canned dog food products and have found them all to be of good quality.', 'The product looks more like a stew than a processed meat and it smells better.']


<br>
Now we normalize each sentence in wine_reviews_sentences and food_reviews_sentences. Basically, this means to tokenize each sentence in two sentence lists into lists of words. Notice that we remove English stopwords, punctuations, use SnowballStemmer for stemming, and convert to lower-case.

In [7]:
stop_words = set(stopwords.words('english'))
punctuation_table = str.maketrans({key: None for key in string.punctuation})  # a table for removing punctuations
sno = SnowballStemmer('english')

def normalize_text(raw_text):
    try:
        word_list = word_tokenize(raw_text)
        normalized_sentence = []
        for w in word_list:
            try:
                w = str(w)
                lower_case_word = str.lower(w)
                stemmed_word = sno.stem(lower_case_word)  # convert all words to their stems
                no_punctuation = stemmed_word.translate(punctuation_table)  # remove punctuations
                if len(no_punctuation) > 1 and no_punctuation not in stop_words:  # remove letters and stop words
                    normalized_sentence.append(no_punctuation)
            except:
                continue
        return normalized_sentence
    except:
        return ''

In [8]:
normalized_wine_reviews_sentences = []
for sentence in tqdm(wine_reviews_sentences, desc="Processing Wine Reviews"):
    normalized_text = normalize_text(sentence)
    normalized_wine_reviews_sentences.append(normalized_text)

normalized_food_reviews_sentences = []
for sentence in tqdm(food_reviews_sentences, desc="Processing Food Reviews"):
    normalized_text = normalize_text(sentence)
    normalized_food_reviews_sentences.append(normalized_text)

Processing Wine Reviews: 100%|██████████| 401049/401049 [02:42<00:00, 2466.37it/s]
Processing Food Reviews: 100%|██████████| 3052482/3052482 [20:08<00:00, 2525.69it/s]


<br>
Examples of elements in the lists after word tokenization would be:

In [14]:
print(normalized_wine_reviews_sentences[:2])
print('')
print(normalized_food_reviews_sentences[:2])

[['made', 'partnership', 'alpha', 'omega', 'wineri', 'jean', 'hoeflig', 'famous', 'namesak', 'wine', 'embrac', 'full', 'napa', 'valley', 'concentr', 'sunni', 'rich', 'show', 'bake', 'plum', 'cinnamon', 'generous', 'tannin', 'oak'], ['backbon', 'plush', 'lush', 'textur', 'evolv', 'open', 'glass']]

[['bought', 'sever', 'vital', 'dog', 'food', 'product', 'found', 'good', 'qualiti'], ['product', 'look', 'like', 'stew', 'process', 'meat', 'smell', 'better']]


<br>
Since not all of the terms are single words, we extract 2-gram phrases that exist more than 100 times and 3-gram phrases that exist more than 50 times through Phrases. Besides, we save the 3-gram Phrases models for both wine and food.

In [31]:
wine_bigram_model = Phrases(normalized_wine_reviews_sentences, min_count=100)
wine_bigrams = [wine_bigram_model[line] for line in normalized_wine_reviews_sentences]
wine_trigram_model = Phrases(wine_bigrams, min_count=50)
phrased_wine_sentences = [wine_trigram_model[line] for line in wine_bigrams]
wine_trigram_model.save('wine_trigram_model.pkl')

food_bigram_model = Phrases(normalized_food_reviews_sentences, min_count=100)
food_bigrams = [food_bigram_model[line] for line in normalized_food_reviews_sentences]
food_trigram_model = Phrases(food_bigrams, min_count=50)
phrased_food_sentences = [food_trigram_model[line] for line in food_bigrams]
food_trigram_model.save('food_trigram_model.pkl')

<br>
Examples of elements in the lists after word phrasing would be:

In [38]:
print(phrased_wine_sentences[3:5])
print('')
print(phrased_food_sentences[220:222])

[['cabernet_sauvignon', 'esteem', 'cold_creek', 'vineyard', 'make', 'backbon', 'wine'], ['intox', 'aroma', 'assort', 'dri', 'herb', 'graphit', 'spice', 'tea', 'leaf', 'coffe', 'black', 'cherri', 'follow', 'tight_wound', 'dark', 'fruit', 'flavor']]

[['roast', 'home', 'stovetop', 'popcorn_popper', 'outsid', 'cours'], ['bean', 'coffe', 'bean', 'direct', 'green', 'mexican', 'altura', 'seem', 'wellsuit', 'method']]


<br>
Now it's time for some visualization. The code of wordcloud plots should be something like below. See more in the visualization scripts.

In [82]:
# font_path = "./Roboto/Roboto-Black.ttf"

# all_words_wine = [word for sublist in phrased_wine_sentences for word in sublist]
# wordcloud_wine = WordCloud(width=400, height=200, font_path=font_path).generate(' '.join(all_words_wine))

# all_words_food = [word for sublist in phrased_food_sentences for word in sublist]
# wordcloud_food = WordCloud(width=400, height=200, font_path=font_path).generate(' '.join(all_words_food))

# plt.figure(figsize=(12, 6))
# plt.subplot(1, 2, 1)
# plt.imshow(wordcloud_wine, interpolation="bilinear")
# plt.axis('off')
# plt.title("Word Cloud for Wine Reviews")

# plt.subplot(1, 2, 2)
# plt.imshow(wordcloud_food, interpolation="bilinear")
# plt.axis('off')
# plt.title("Word Cloud for Food Reviews")

# plt.show()

In [81]:
# data = phrased_food_sentences

# with open('phrased_food_sentences.txt', 'w', encoding='utf-8') as file:
#     for item in data:
#         line = ' '.join(item) 
#         file.write(line + '\n')

<br>
After that, apply a descriptor mapping process for the wine reviews with resources in descriptor_mapping.csv. The mapping csv file maps a raw descriptor that could be in a review to a detailed descriptor (level 3) with a detailed term (level 2), a raw term (level 1), and a type of the term (type).

In [85]:
descriptor_mapping = pd.read_csv('descriptor_mapping.csv', encoding='latin1').set_index('raw descriptor')

def return_mapped_descriptor(word, mapping):
    if word in list(mapping.index):
        normalized_word = mapping.at[word, 'level_3']
        return normalized_word
    else:
        return word

In [90]:
descriptor_mapping.head(5)

Unnamed: 0_level_0,level_3,level_2,level_1,type
raw descriptor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
abras,abrasive,high_tannin,tannin,nonaroma
acacia,acacia,flowery,flower,aroma
acacia_flower,acacia,flowery,flower,aroma
aciddriven,acid_driven,high_acid,acid,nonaroma
aggress,aggressive,high_acid,acid,nonaroma


In [92]:
normalized_wine_sentences = []
for sentence in phrased_wine_sentences:
    normalized_wine_sentence = []
    for word in sentence:
        normalized_word = return_mapped_descriptor(word, descriptor_mapping)
        normalized_wine_sentence.append(str(normalized_word))
    normalized_wine_sentences.append(normalized_wine_sentence)

In [98]:
normalized_wine_sentences[1:3]

# data = normalized_wine_sentences

# with open('normalized_wine_sentences(mapped).txt', 'w', encoding='utf-8') as file:
#     for item in data:
#         line = ' '.join(item) 
#         file.write(line + '\n')

[['backbon', 'plush', 'lush', 'textur', 'evolv', 'open', 'glass'],
 ['enjoy', 'good', 'time', 'show', 'best', '2024â\x80\x932034']]

<br>
Next, apply the same process for food reviews, but without normalizing the nonaroma descriptors.

In [101]:
aroma_descriptor_mapping = descriptor_mapping.loc[descriptor_mapping['type'] == 'aroma']

normalized_food_sentences = []
for sentence in tqdm(phrased_food_sentences, desc="Processing Phrased Food Sentences"):
    normalized_food_sentence = []
    for word in sentence:
        normalized_word = return_mapped_descriptor(word, aroma_descriptor_mapping)
        normalized_food_sentence.append(str(normalized_word))
    normalized_food_sentences.append(normalized_food_sentence)

Processing Phrased Food Sentences: 100%|██████████| 3052482/3052482 [33:32<00:00, 1516.55it/s]


In [105]:
normalized_food_sentences[1:3]

# data = normalized_food_sentences

# with open('normalized_food_sentences(mapped).txt', 'w', encoding='utf-8') as file:
#     for item in data:
#         line = ' '.join(item) 
#         file.write(line + '\n')

[['product', 'look', 'like', 'stew', 'process', 'meat', 'smell', 'better'],
 ['labrador', 'finicki', 'appreci', 'product', 'better']]

<br>
After series of data cleaning and descriptor mapping for both wine and food reviews, now combine them together. For convenience, we save the total normalized sentences <font color="red">(very important!)</font>

In [106]:
normalized_sentences = normalized_wine_sentences + normalized_food_sentences

In [109]:
# data = normalized_sentences

# with open('final_normalized_sentences.txt', 'w', encoding='utf-8') as file:
#     for item in data:
#         line = ' '.join(item) 
#         file.write(line + '\n')

<br>
Now it's time to do vectorization.

In [112]:
logging.basicConfig(format="%(levelname)s - %(message)s", level=logging.INFO)

word2vec_model = Word2Vec(normalized_sentences, vector_size=300, min_count=8, epochs=15)
word2vec_model.save("word2vec.model")

INFO - collecting all words and their counts
INFO - PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - PROGRESS: at sentence #10000, processed 68205 words, keeping 3762 word types
INFO - PROGRESS: at sentence #20000, processed 136181 words, keeping 5491 word types
INFO - PROGRESS: at sentence #30000, processed 206028 words, keeping 6904 word types
INFO - PROGRESS: at sentence #40000, processed 276062 words, keeping 8151 word types
INFO - PROGRESS: at sentence #50000, processed 362817 words, keeping 10261 word types
INFO - PROGRESS: at sentence #60000, processed 459662 words, keeping 11847 word types
INFO - PROGRESS: at sentence #70000, processed 556828 words, keeping 13314 word types
INFO - PROGRESS: at sentence #80000, processed 644147 words, keeping 14841 word types
INFO - PROGRESS: at sentence #90000, processed 731193 words, keeping 16402 word types
INFO - PROGRESS: at sentence #100000, processed 820799 words, keeping 17596 word types
INFO - PROGRESS: at senten

INFO - PROGRESS: at sentence #940000, processed 7329780 words, keeping 85533 word types
INFO - PROGRESS: at sentence #950000, processed 7401832 words, keeping 86063 word types
INFO - PROGRESS: at sentence #960000, processed 7473185 words, keeping 86484 word types
INFO - PROGRESS: at sentence #970000, processed 7543186 words, keeping 86868 word types
INFO - PROGRESS: at sentence #980000, processed 7613796 words, keeping 87517 word types
INFO - PROGRESS: at sentence #990000, processed 7684572 words, keeping 88092 word types
INFO - PROGRESS: at sentence #1000000, processed 7757133 words, keeping 88507 word types
INFO - PROGRESS: at sentence #1010000, processed 7831011 words, keeping 89114 word types
INFO - PROGRESS: at sentence #1020000, processed 7901999 words, keeping 89545 word types
INFO - PROGRESS: at sentence #1030000, processed 7974272 words, keeping 90007 word types
INFO - PROGRESS: at sentence #1040000, processed 8047599 words, keeping 90317 word types
INFO - PROGRESS: at sentenc

INFO - PROGRESS: at sentence #1850000, processed 13926355 words, keeping 121521 word types
INFO - PROGRESS: at sentence #1860000, processed 14001156 words, keeping 121926 word types
INFO - PROGRESS: at sentence #1870000, processed 14073763 words, keeping 122262 word types
INFO - PROGRESS: at sentence #1880000, processed 14145658 words, keeping 122669 word types
INFO - PROGRESS: at sentence #1890000, processed 14216314 words, keeping 123012 word types
INFO - PROGRESS: at sentence #1900000, processed 14289735 words, keeping 123330 word types
INFO - PROGRESS: at sentence #1910000, processed 14361363 words, keeping 123628 word types
INFO - PROGRESS: at sentence #1920000, processed 14433729 words, keeping 123932 word types
INFO - PROGRESS: at sentence #1930000, processed 14504886 words, keeping 124323 word types
INFO - PROGRESS: at sentence #1940000, processed 14578207 words, keeping 124694 word types
INFO - PROGRESS: at sentence #1950000, processed 14653617 words, keeping 125079 word types

INFO - PROGRESS: at sentence #2760000, processed 20478153 words, keeping 146191 word types
INFO - PROGRESS: at sentence #2770000, processed 20549392 words, keeping 146502 word types
INFO - PROGRESS: at sentence #2780000, processed 20620934 words, keeping 146664 word types
INFO - PROGRESS: at sentence #2790000, processed 20695456 words, keeping 146983 word types
INFO - PROGRESS: at sentence #2800000, processed 20766190 words, keeping 147247 word types
INFO - PROGRESS: at sentence #2810000, processed 20837946 words, keeping 147497 word types
INFO - PROGRESS: at sentence #2820000, processed 20909731 words, keeping 147769 word types
INFO - PROGRESS: at sentence #2830000, processed 20982397 words, keeping 147995 word types
INFO - PROGRESS: at sentence #2840000, processed 21057511 words, keeping 148305 word types
INFO - PROGRESS: at sentence #2850000, processed 21127436 words, keeping 148466 word types
INFO - PROGRESS: at sentence #2860000, processed 21203798 words, keeping 148645 word types

INFO - EPOCH 0 - PROGRESS: at 2.41% examples, 615548 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 4.80% examples, 635749 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 6.99% examples, 636801 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 9.09% examples, 628887 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 11.32% examples, 626789 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 13.93% examples, 621993 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 16.58% examples, 616014 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 19.21% examples, 611711 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 21.85% examples, 608404 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 24.65% examples, 609345 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 27.49% examples, 611895 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 0 - PROGRESS: at 30.35% examples, 

INFO - EPOCH 2 - PROGRESS: at 45.36% examples, 593633 words/s, in_qsize 6, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 47.70% examples, 589515 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 50.11% examples, 587057 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 52.91% examples, 588566 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 55.75% examples, 590768 words/s, in_qsize 6, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 58.54% examples, 592045 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 61.35% examples, 593458 words/s, in_qsize 6, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 64.22% examples, 594553 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 67.14% examples, 596424 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 70.07% examples, 597898 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 72.95% examples, 599428 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 2 - PROGRESS: at 75.74% exampl

INFO - EPOCH 4 - PROGRESS: at 94.81% examples, 602865 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 4 - PROGRESS: at 97.34% examples, 601785 words/s, in_qsize 5, out_qsize 1
INFO - EPOCH 4: training on 25458317 raw words (23030768 effective words) took 38.2s, 602857 effective words/s
INFO - EPOCH 5 - PROGRESS: at 2.47% examples, 631924 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 5 - PROGRESS: at 4.89% examples, 650429 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 5 - PROGRESS: at 7.08% examples, 645436 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 5 - PROGRESS: at 9.37% examples, 645914 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 5 - PROGRESS: at 11.80% examples, 649461 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 5 - PROGRESS: at 14.65% examples, 646907 words/s, in_qsize 6, out_qsize 0
INFO - EPOCH 5 - PROGRESS: at 17.56% examples, 645061 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 5 - PROGRESS: at 20.46% examples, 642746 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 5 - PROGRE

INFO - EPOCH 7 - PROGRESS: at 37.87% examples, 602382 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 40.51% examples, 602047 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 42.92% examples, 598181 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 45.56% examples, 597649 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 48.38% examples, 598493 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 51.14% examples, 599884 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 53.91% examples, 600708 words/s, in_qsize 6, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 56.71% examples, 601745 words/s, in_qsize 6, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 59.38% examples, 601641 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 62.13% examples, 602006 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 64.90% examples, 602690 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 7 - PROGRESS: at 67.79% exampl

INFO - EPOCH 9 - PROGRESS: at 91.89% examples, 619344 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 9 - PROGRESS: at 94.76% examples, 619630 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 9 - PROGRESS: at 97.64% examples, 620019 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 9: training on 25458317 raw words (23030636 effective words) took 37.2s, 619172 effective words/s
INFO - EPOCH 10 - PROGRESS: at 2.07% examples, 537600 words/s, in_qsize 6, out_qsize 1
INFO - EPOCH 10 - PROGRESS: at 4.46% examples, 593612 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 10 - PROGRESS: at 6.72% examples, 616178 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 10 - PROGRESS: at 9.06% examples, 628931 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 10 - PROGRESS: at 11.45% examples, 634643 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 10 - PROGRESS: at 14.17% examples, 630919 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 10 - PROGRESS: at 17.04% examples, 631598 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 10 

INFO - EPOCH 12 - PROGRESS: at 36.02% examples, 619362 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 38.82% examples, 619949 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 41.60% examples, 620467 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 44.43% examples, 620907 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 47.25% examples, 621026 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 49.98% examples, 620547 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 52.37% examples, 615858 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 54.95% examples, 613840 words/s, in_qsize 6, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 57.73% examples, 614276 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 60.59% examples, 614788 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 63.42% examples, 615292 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 12 - PROGRESS: at 6

INFO - EPOCH 14 - PROGRESS: at 47.97% examples, 489982 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 50.23% examples, 490410 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 52.54% examples, 491497 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 54.71% examples, 491044 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 57.06% examples, 492056 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 59.38% examples, 492825 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 61.72% examples, 493393 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 64.34% examples, 496100 words/s, in_qsize 6, out_qsize 1
INFO - EPOCH 14 - PROGRESS: at 67.15% examples, 499958 words/s, in_qsize 4, out_qsize 1
INFO - EPOCH 14 - PROGRESS: at 69.77% examples, 502274 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 72.02% examples, 501987 words/s, in_qsize 5, out_qsize 0
INFO - EPOCH 14 - PROGRESS: at 7