In [64]:
import sys
import os

sys.path.append(os.getcwd())
from utils.data_util import read_xml, write_binary
from utils.util import load_glove_embeddings, get_similar_words
import spacy
import nltk
import numpy as np
import random

nltk.download('wordnet')
from nltk.corpus import wordnet

# -----CHANGE THESE VALUES ACCORDINGLY BEFORE RUNNING THE SCRIPT----
DATA_TYPE = 'restaurant'
# DATA_TYPE = 'laptops'
TYPE = 'train'
# TYPE = 'test'

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/rajdeepsurolia/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [65]:
INPUT_FILE_PATH = os.path.join(*[os.path.curdir, 'data/semeval_16', 'ABSA16_Restaurants_Train_SB1_v2.xml'])
OVERSAMPLING = False
PARAPHRASING = False
REDUCED = False
MATCH_TYPE = 'similar'
# MATCH_TYPE = 'synonym'
MAX_MATCHED_WORDS = 3
MINORITY_CLASS = [2]
if PARAPHRASING and MATCH_TYPE == 'similar':
    embedding = load_glove_embeddings()
# -------------------------------------------------------------------

TOTAL_SENTENCE_COUNT = 0
TOTAL_REVIEW_COUNT = 0
TOTAL_AUGMENTED_REVIEW_COUNT = 0
TOTAL_POSITIVE_LABEL_COUNT = 0
TOTAL_NEGATIVE_LABEL_COUNT = 0
TOTAL_NEUTRAL_LABEL_COUNT = 0
TOTAL_NOT_APPLICABLE_LABEL_COUNT = 0
ASPECT_TO_SENTENCE_FREQUENCY = {}

NLP = spacy.load('en')

In [66]:
def get_one_hot_encoded_sentiment(sentiment):
    p = [0, 0, 0, 0]
    global TOTAL_POSITIVE_LABEL_COUNT
    global TOTAL_NEGATIVE_LABEL_COUNT
    global TOTAL_NEUTRAL_LABEL_COUNT

    if sentiment == 'positive':
        p[0] = 1
        TOTAL_POSITIVE_LABEL_COUNT += 1
    elif sentiment == 'negative':
        p[1] = 1
        TOTAL_NEGATIVE_LABEL_COUNT += 1
    elif sentiment == 'neutral':
        p[2] = 1
        TOTAL_NEUTRAL_LABEL_COUNT += 1
    else:
        p[3] = 1
    return p


def get_categorical_sentiment(sentiment):
    return np.argmax(get_one_hot_encoded_sentiment(sentiment))


def update_aspect_to_sentence_frequency(aspect):
    global ASPECT_TO_SENTENCE_FREQUENCY
    count = ASPECT_TO_SENTENCE_FREQUENCY.get(aspect)
    if count:
        ASPECT_TO_SENTENCE_FREQUENCY[aspect] = count + 1
    else:
        ASPECT_TO_SENTENCE_FREQUENCY[aspect] = 1


def make_flatten_restaurant_data_sentence_level(reviews, mode = 'train'):
    """
    [
    [[aspect1], [review1], [polarity]],
    [[aspect2], [review1], [polarity]]
    ]

    [['food', 'quality'], [[['Judging from previous posts this used to be a good place, but not any longer.'], [0, 0, 0, 1]],
                      ,[['We, there were four of us, arrived at noon - the place was empty - and the staff acted like we
                        were imposing on them and they were very rude.'], [0, 0, 0, 1]],
                      [['They never brought us complimentary noodles, ignored repeated requests for sugar,
                        and threw our dishes on the table.'], [0, 0, 0, 1]],
                      [['The food was lousy - too sweet or too salty and the portions tiny.'], [0, 1, 0, 0]],
                      [['After all that, they complained to me about the small tip.'], [0, 0, 0, 1]],
                      [['Avoid this place!'], [0, 0, 0, 1]]
                      ]
    ]

    This method reads data from the original xml file and formats it in the way shown above. If N is the number of
    possible aspects in this dataset then we repeat or augment each review N times once for each aspect. A review can
    consist of any number of sentences. Each sentence in a review has a label. Labels represent sentiment polarity or
    non applicability of a sentence corresponding to an aspect. For instance, in the above example labels for each
    sentence are generated for the aspect food#quality. Sentences which either do dont talk about this particular aspect
    or any of the possible aspects are labeled as N/A in this datapoint. For instance, the last sentence "Avoid this place"
    is maked as N/A in this datapoint. Although this same sentence will be labelled as NEGATIVE in another datapoint of
    the same review for another aspect restaurant#general.
    :return:
    """

    restaurant_possible_aspects = ['restaurant#general', 'restaurant#prices', 'restaurant#miscellaneous',
                                   'food#prices', 'food#quality', 'food#style_options',
                                   'drinks#prices', 'drinks#quality', 'drinks#style_options',
                                   'ambience#general',
                                   'service#general',
                                   'location#general']

    # we have 22 entities, 9 attributes so total 198 possible aspects
    # but in training data we have only 81 aspects present. In total we selected 116 aspects based our understanding of
    # which entity-attribute pair makes sense.
    laptops_possible_aspects = ['laptop#general',
                                'laptop#price',
                                'laptop#quality',
                                'laptop#operation_performance',
                                'laptop#usability',
                                'laptop#design_features',
                                'laptop#portability',
                                'laptop#connectivity',
                                'laptop#miscellaneous',
                                'display#general',
                                'display#quality',
                                'display#operation_performance',
                                'display#usability',
                                'display#design_features',
                                'display#portability',
                                'display#miscellaneous',
                                'cpu#general',
                                'cpu#price',
                                'cpu#quality',
                                'cpu#operation_performance',
                                'cpu#design_features',
                                'cpu#miscellaneous',
                                'motherboard#general',
                                'motherboard#price',
                                'motherboard#quality',
                                'motherboard#design_features',
                                'motherboard#miscellaneous',
                                'hard_disc#general',
                                'hard_disc#price',
                                'hard_disc#quality',
                                'hard_disc#operation_performance',
                                'hard_disc#design_features',
                                'hard_disc#miscellaneous',
                                'memory#general',
                                'memory#price',
                                'memory#design_features',
                                'memory#miscellaneous',
                                'battery#general',
                                'battery#quality',
                                'battery#operation_performance',
                                'battery#design_features',
                                'battery#miscellaneous',
                                'power_supply#general',
                                'power_supply#price',
                                'power_supply#quality',
                                'power_supply#operation_performance',
                                'power_supply#design_features',
                                'power_supply#miscellaneous',
                                'keyboard#general',
                                'keyboard#quality',
                                'keyboard#operation_performance',
                                'keyboard#usability',
                                'keyboard#design_features',
                                'keyboard#miscellaneous',
                                'mouse#general',
                                'mouse#quality',
                                'mouse#operation_performance',
                                'mouse#usability',
                                'mouse#design_features',
                                'mouse#miscellaneous',
                                'fans_cooling#general',
                                'fans_cooling#quality',
                                'fans_cooling#operation_performance',
                                'fans_cooling#design_features',
                                'fans_cooling#miscellaneous',
                                'optical_drives#general',
                                'optical_drives#quality',
                                'optical_drives#operation_performance',
                                'optical_drives#design_features',
                                'optical_drives#miscellaneous',
                                'ports#general',
                                'ports#quality',
                                'ports#operation_performance',
                                'ports#design_features',
                                'ports#miscellaneous',
                                'graphics#general',
                                'graphics#quality',
                                'graphics#design_features',
                                'graphics#miscellaneous',
                                'multimedia_devices#general',
                                'multimedia_devices#quality',
                                'multimedia_devices#operation_performance',
                                'multimedia_devices#usability',
                                'multimedia_devices#design_features',
                                'multimedia_devices#miscellaneous',
                                'hardware#general',
                                'hardware#quality',
                                'hardware#operation_performance',
                                'hardware#usability',
                                'hardware#design_features',
                                'hardware#miscellaneous',
                                'os#general',
                                'os#quality',
                                'os#operation_performance',
                                'os#usability',
                                'os#design_features',
                                'os#miscellaneous',
                                'software#general',
                                'software#price',
                                'software#quality',
                                'software#operation_performance',
                                'software#usability',
                                'software#design_features',
                                'software#miscellaneous',
                                'warranty#general',
                                'warranty#price',
                                'warranty#miscellaneous',
                                'shipping#general',
                                'shipping#price',
                                'shipping#quality',
                                'shipping#miscellaneous',
                                'support#general',
                                'support#price',
                                'support#quality',
                                'support#miscellaneous',
                                'company#general']

    global TOTAL_SENTENCE_COUNT
    global TOTAL_REVIEW_COUNT
    global TOTAL_AUGMENTED_REVIEW_COUNT
    global TOTAL_POSITIVE_LABEL_COUNT
    global TOTAL_NEGATIVE_LABEL_COUNT
    global TOTAL_NEUTRAL_LABEL_COUNT
    global TOTAL_NOT_APPLICABLE_LABEL_COUNT
    global ASPECT_TO_SENTENCE_FREQUENCY
    global DATA_TYPE

    if DATA_TYPE == 'restaurant':
        possible_aspects = restaurant_possible_aspects
    elif DATA_TYPE == 'laptops':
        possible_aspects = laptops_possible_aspects

    TOTAL_SENTENCE_COUNT = 0
    TOTAL_REVIEW_COUNT = 0
    TOTAL_AUGMENTED_REVIEW_COUNT = 0
    TOTAL_POSITIVE_LABEL_COUNT = 0
    TOTAL_NEGATIVE_LABEL_COUNT = 0
    TOTAL_NEUTRAL_LABEL_COUNT = 0
    TOTAL_NOT_APPLICABLE_LABEL_COUNT = 0
    ASPECT_TO_SENTENCE_FREQUENCY = {}

    dataset = []
    for i, review in enumerate(reviews):
        TOTAL_REVIEW_COUNT += 1
        print('review-' + str(i))

        review_text = []
        aspect_sentence_polarity_map = {}
        sentences = review['sentences']['sentence']
        if isinstance(sentences, dict):
            sentences = [sentences]
        for j, sentence in enumerate(sentences):
            TOTAL_SENTENCE_COUNT += 1
            sentence_text = []
            sentence_text.append(sentence['text'])
            if 'Opinions' in sentence.keys():
                opinions = sentence['Opinions']['Opinion']
                if isinstance(opinions, dict):
                    opinions = [opinions]

                for opinion in opinions:
                    aspect_category = opinion['@category'].lower()
                    update_aspect_to_sentence_frequency(aspect_category)
                    polarity = get_categorical_sentiment(opinion['@polarity'])

                    # Here we are trying to create a map of sentences and aspects. Basicly, for the current review which
                    # sentence is related to which aspect.
                    sentence_polarity = aspect_sentence_polarity_map.get(aspect_category, [])
                    sentence_polarity.append([j, polarity])
                    aspect_sentence_polarity_map[aspect_category] = sentence_polarity
            # else:
            #     # no aspect, contains no sentiment, either out of domain or just some fact
            #     sentence_polarity = aspect_sentence_polarity_map.get('relevance', [])
            #     sentence_polarity.append([j, 3])
            #     aspect_sentence_polarity_map['relevance'] = sentence_polarity

            review_text.append(sentence_text)

        # It could be that a particular review has no sentence for some aspects. Here we are just adding an empty
        # sentence list for such aspects.
        if not REDUCED:
            for aspect in possible_aspects:
                if aspect not in aspect_sentence_polarity_map.keys():
                    aspect_sentence_polarity_map[aspect] = []

        # Now for every possible aspect we will create a datapoint using this particular review.
        for a, sent_polarities in aspect_sentence_polarity_map.items():
            TOTAL_AUGMENTED_REVIEW_COUNT += 1
            aspect_words = []
            aspects = a.split('#')
            aspect_words.extend(aspects[0].split('_'))
            if len(aspects) > 1:
                aspect_words.extend(aspects[1].split('_'))
            augmented_review = []
            augmented_polarity = []
            # check which sentences from the current review are related to this aspect 'a' and has some polarity.
            # Iterate over each sentence from the review and check in the aspect's map whether it is present there
            # or not. If yes, mark the sentence's sentiment polarity accordinly or otherwise mark it N/A(3)
            for j, s in enumerate(review_text):
                updated_polarity = 3
                for sent_polarity in sent_polarities:
                    if j == sent_polarity[0]:
                        # sentence j contains current aspect
                        updated_polarity = sent_polarity[1]
                        break
                if updated_polarity == 3:
                    TOTAL_NOT_APPLICABLE_LABEL_COUNT += 1
                augmented_polarity.append(updated_polarity)
                augmented_review.append(s)
            augmented_datapoint = [aspect_words, augmented_review, augmented_polarity]
            dataset.append(augmented_datapoint)

            if OVERSAMPLING:
                oversampled_datapoints = oversampling(augmented_datapoint)
                if oversampled_datapoints is not None:
                    for oversampled_datapoint in oversampled_datapoints:
                        TOTAL_NEUTRAL_LABEL_COUNT += 1
                        TOTAL_AUGMENTED_REVIEW_COUNT += 1
                        dataset.append(oversampled_datapoint)

        print('---------')
    print(dataset[0])
    print(dataset[1])
    print(dataset[2])
    print(dataset[3])
    print(dataset[4])
    print(dataset[5])
    print(dataset[6])
    print(dataset[7])
    print(dataset[8])
    print(dataset[9])
    print(dataset[10])
    print(dataset[11])
    print(dataset[12])
    print(len(dataset))
    output_file_name = 'formatted_' + DATA_TYPE + '_' + mode + '.pickle'
    write_binary(dataset, filename = output_file_name)
    print('---', mode, '---')
    print('TOTAL_REVIEW_COUNT: ', TOTAL_REVIEW_COUNT)
    print('TOTAL_SENTENCE_COUNT: ', TOTAL_SENTENCE_COUNT)
    print('TOTAL_AUGMENTED_REVIEW_COUNT: ', TOTAL_AUGMENTED_REVIEW_COUNT)
    print('TOTAL_POSITIVE_LABEL_COUNT: ', TOTAL_POSITIVE_LABEL_COUNT)
    print('TOTAL_NEGATIVE_LABEL_COUNT: ', TOTAL_NEGATIVE_LABEL_COUNT)
    print('TOTAL_NEUTRAL_LABEL_COUNT: ', TOTAL_NEUTRAL_LABEL_COUNT)
    print('TOTAL_NOT_APPLICABLE_LABEL_COUNT: ', TOTAL_NOT_APPLICABLE_LABEL_COUNT)
    total_label_count = TOTAL_POSITIVE_LABEL_COUNT + TOTAL_NEGATIVE_LABEL_COUNT + TOTAL_NEUTRAL_LABEL_COUNT + TOTAL_NOT_APPLICABLE_LABEL_COUNT
    print('TOTAL_LABELS: ', total_label_count)
    print('CLASS 0: ', (TOTAL_POSITIVE_LABEL_COUNT / total_label_count) * 100)
    print('CLASS 1: ', (TOTAL_NEGATIVE_LABEL_COUNT / total_label_count) * 100)
    print('CLASS 2: ', (TOTAL_NEUTRAL_LABEL_COUNT / total_label_count) * 100)
    print('CLASS 3: ', (TOTAL_NOT_APPLICABLE_LABEL_COUNT / total_label_count) * 100)
    print('ASPECT_TO_SENTENCE_FREQUENCY:')
    for k, v in ASPECT_TO_SENTENCE_FREQUENCY.items():
        print(k + ": " + str(v))


def oversampling(datapoint):
    augmented = False
    oversampled_datapoints = []
    sent_paraphrases_map = {}
    aspect_words = datapoint[0]
    review = datapoint[1]
    polarity = datapoint[2]
    augmented_review, augmented_polarity = [], []
    for i, sent_polarity in enumerate(polarity):
        if sent_polarity in MINORITY_CLASS:
            sent = review[i]
            augmented_review.append(sent)
            augmented_polarity.append(polarity[i])
            augmented = True
            if PARAPHRASING:
                sent_paraphrases_map[sent[0]] = paraphrase(sent)
    oversampled_datapoints.append([aspect_words, augmented_review, augmented_polarity])

    # permute paraphrased sentences to create new data points
    if PARAPHRASING:
        for i, sent in enumerate(augmented_review):
            paraphrases = sent_paraphrases_map.get(sent[0])
            for p_sent in paraphrases:
                new_review = augmented_review.copy()
                new_review[i] = [p_sent]
                oversampled_datapoints.append([aspect_words, new_review, augmented_polarity])

    if augmented:
        # print('##########################')
        # for dp in oversampled_datapoints:
        #     print(dp)
        # print('##########################')
        return oversampled_datapoints
    else:
        return None


def paraphrase(text):
    """
    This method paraphrases a sentence by replacing nouns and adjectives with synonym words.

    1) find POS tags.
    2) find synonyms for nouns and adjectives.
    3) Make sure to use noun synonyms for nouns and adjective synonyms for adjectives.
    4) permute synonyms to create multiple paraphrases.
    :param text:
    :return:
    """

    word_to_synonym_map = {}
    paraphrases = []
    sentence = text[0]

    doc = NLP(sentence)
    for token in doc:
        word = token.text
        word_pos = token.pos_
        if word_pos == 'NOUN':
            synonyms = get_matched_words(word, word_pos = 'n')
            word_to_synonym_map[word] = synonyms
        elif word_pos == 'ADJ':
            synonyms = get_matched_words(word, word_pos = 'a')
            word_to_synonym_map[word] = synonyms

    for word, synonyms in word_to_synonym_map.items():
        for synonym in synonyms:
            paraphrases.append(sentence.replace(word, synonym))

    print('##########################')
    for dp in paraphrases:
        print(dp)
    print('##########################')
    return paraphrases


def limit_matched_words(words):
    matched_words = set()
    count = MAX_MATCHED_WORDS
    for word in words:
        if count > 0:
            matched_words.add(word)
            count -= 1
        else:
            break
    return matched_words


def get_matched_words(word, word_pos):
    matched_words = set()
    if MATCH_TYPE == 'synonym':
        matched_words = limit_matched_words(get_synonyms(word, word_pos))
    elif MATCH_TYPE == 'similar':
        matched_words = limit_matched_words(get_similar_words_using_embedding(word))
    return matched_words


def get_similar_words_using_embedding(word):
    similar_words = get_similar_words(word, embedding = embedding)
    reduced_similar_words = set()
    for similar_word in similar_words:
        if word not in similar_word[0]:
            reduced_similar_words.add(similar_word[0])
    return reduced_similar_words


def get_synonyms(word, word_pos):
    synonyms = set()
    syns = wordnet.synsets(word)
    for syn in syns:
        syn_pos = syn.name().split('.')[1]
        if syn_pos == word_pos:
            for l in syn.lemmas():
                synonyms.add(' '.join(l.name().split('_')))
    return synonyms


def make_split(doc):
    test_reviews = []
    val_reviews = []
    reviews = doc['Reviews']['Review']
    for i, review in enumerate(reviews):
        r = random.random()
        if r < 0.5:
            test_reviews.append(review)
        else:
            val_reviews.append(review)
    return test_reviews, val_reviews


In [67]:
if TYPE == 'train':
        doc = read_xml(INPUT_FILE_PATH)
        reviews = doc['Reviews']['Review']
        make_flatten_restaurant_data_sentence_level(reviews, mode = 'train')
elif TYPE == 'test':
    doc = read_xml(INPUT_FILE_PATH)
    test_reviews, val_reviews = make_split(doc)
    make_flatten_restaurant_data_sentence_level(test_reviews, mode = 'test')
    make_flatten_restaurant_data_sentence_level(val_reviews, mode = 'val')


review-0
---------
review-1
---------
review-2
---------
review-3
---------
review-4
---------
review-5
---------
review-6
---------
review-7
---------
review-8
---------
review-9
---------
review-10
---------
review-11
---------
review-12
---------
review-13
---------
review-14
---------
review-15
---------
review-16
---------
review-17
---------
review-18
---------
review-19
---------
review-20
---------
review-21
---------
review-22
---------
review-23
---------
review-24
---------
review-25
---------
review-26
---------
review-27
---------
review-28
---------
review-29
---------
review-30
---------
review-31
---------
review-32
---------
review-33
---------
review-34
---------
review-35
---------
review-36
---------
review-37
---------
review-38
---------
review-39
---------
review-40
---------
review-41
---------
review-42
---------
review-43
---------
review-44
---------
review-45
---------
review-46
---------
review-47
---------
review-48
---------
review-49
---------
review-50


In [68]:
from config.settings import WORD_FREQ_FILE, VOCAB_TO_CODE_FILE, CODE_TO_VOCAB_FILE, CODE_TO_EMBED_FILE
from utils.data_util import read_binary, write_binary
from utils.util import load_glove_embeddings, load_fastText_embeddings, load_oov_fastText_embeddings
from collections import defaultdict
import random

In [69]:
# ---SCRIPT DEPENDENCIES----
# python -m spacy download en
# --------------------------

# -----CHANGE THESE VALUES ACCORDINGLY BEFORE RUNNING THE SCRIPT-----
TYPE = 'train'
# TYPE = 'test'
# TYPE = 'val'
FILE_NAME = 'restaurant'
# FILE_NAME = 'laptops'
EMBEDDING_TYPE = 'fasttext'
# EMBEDDING_TYPE = 'glove'
# EMBEDDING_TYPE = None
# -------------------------------------------------------------------
FORMATTED_FILE_NAME = 'formatted_' + FILE_NAME + '_' + TYPE + '.pickle'
PROCESSED_FILE_NAME = 'processed_' + TYPE + '.pickle'

EMBEDDING_DIMENSION = 300
MAX_VOCAB_SIZE = 50001
UNKNOWN_EMBEDDING = np.random.randn(EMBEDDING_DIMENSION)
PAD = 0
PAD_EMBEDDING = np.zeros(EMBEDDING_DIMENSION)
NLP = spacy.load('en')

HOTELS_ASPECT_WORDS = ['hotel', 'general', 'prices', 'design', 'features', 'cleanliness', 'comfort',
                       'quality', 'style', 'options', 'miscellaneous', 'rooms', 'room', 'amenities', 'facilities',
                       'service', 'location', 'food', 'drinks', 'Hotel', 'General', 'Prices', 'Design', 'Features',
                       'Cleanliness', 'Comfort', 'Quality', 'Style', 'Options', 'Miscellaneous', 'Rooms', 'Room',
                       'Amenities', 'Facilities', 'Service', 'Location', 'Food', 'Drinks']

HOTELS_ALL_POSSIBLE_ASPECTS = [['hotel', 'general'], ['hotel', 'prices'], ['hotel', 'design', 'features'],
                               ['hotel', 'cleaniness'], ['hotel', 'comfort'], ['hotel', 'quality'],
                               ['hotel', 'miscellaneous'],
                               ['rooms', 'general'], ['rooms', 'prices'], ['rooms', 'design', 'features'],
                               ['rooms', 'cleaniness'], ['rooms', 'comfort'], ['rooms', 'quality'],
                               ['rooms', 'miscellaneous'],
                               ['room', 'amenities', 'general'], ['room', 'amenities', 'prices'],
                               ['room', 'amenities', 'design', 'features'],
                               ['room', 'amenities', 'cleaniness'], ['room', 'amenities', 'comfort'],
                               ['room', 'amenities', 'quality'],
                               ['room', 'amenities', 'miscellaneous'], ['service', 'general'], ['location', 'general'],
                               ['food', 'drinks', 'prices'], ['food', 'drinks', 'quality'],
                               ['food', 'drinks', 'style', 'options']]

RESTAURANT_ASPECT_WORDS = ['restaurant', 'general', 'prices', 'miscellaneous', 'food', 'quality', 'style', 'options',
                           'drinks', 'ambience', 'service', 'location', 'Restaurant', 'General', 'Prices',
                           'Miscellaneous',
                           'Food',
                           'Quality', 'Style', 'Options', 'Drinks', 'Ambience', 'Service', 'Location']

RESTAURANT_ALL_POSSIBLE_ASPECTS = [['restaurant', 'general'], ['restaurant', 'prices'], ['restaurant', 'miscellaneous'],
                                   ['food', 'prices'], ['food', 'quality'], ['food', 'style', 'options'],
                                   ['drinks', 'prices'], ['drinks', 'quality'], ['drinks', 'style', 'options'],
                                   ['ambience', 'general'],
                                   ['service', 'general'],
                                   ['location', 'general']]

RESTAURANT_ASPECT_WORD_INDEX_MAP = {
    'foodstyleoptions': 0,
    'foodquality': 1,
    'drinksprices': 2,
    'foodprices': 3,
    'ambiencegeneral': 4,
    'drinksstyleoptions': 5,
    'locationgeneral': 6,
    'drinksquality': 7,
    'restaurantprices': 8,
    'restaurantmiscellaneous': 9,
    'servicegeneral': 10,
    'restaurantgeneral': 11,
    'none': 12
}

RESTAURANT_INDEX_TO_ASPECT_WORD_MAP = {
    0: 'foodstyleoptions',
    1: 'foodquality',
    2: 'drinksprices',
    3: 'foodprices',
    4: 'ambiencegeneral',
    5: 'drinksstyleoptions',
    6: 'locationgeneral',
    7: 'drinksquality',
    8: 'restaurantprices',
    9: 'restaurantmiscellaneous',
    10: 'servicegeneral',
    11: 'restaurantgeneral',
    12: 'none'
}

LAPTOPS_ASPECT_WORDS = ['laptop', 'display', 'cpu', 'motherboard', 'hard', 'disc', 'memory', 'battery', 'power',
                        'supply', 'keyboard', 'mouse', 'fan', 'fans', 'cooling', 'optical', 'drives', 'drive', 'ports',
                        'graphics', 'multimedia', 'devices', 'device', 'hardware', 'os', 'software', 'warranty',
                        'shipping', 'support', 'company', 'general', 'price', 'quality', 'operation', 'performance',
                        'usability', 'design', 'features', 'portability', 'connectivity', 'miscellaneous',
                        'Laptop', 'Display', 'CPU', 'Cpu', 'Motherboard', 'Hard', 'Disc', 'Memory', 'Battery', 'Power',
                        'Supply', 'Keyboard', 'Mouse', 'Fan', 'Fans', 'Cooling', 'Optical', 'Drives', 'Drive', 'Ports',
                        'Graphics', 'Multimedia', 'Devices', 'Device', 'Hardware', 'OS', 'Os', 'Software', 'Warranty',
                        'Shipping', 'Support', 'Company', 'General', 'Price', 'Quality', 'Operation', 'Performance',
                        'Usability', 'Design', 'Features', 'Portability', 'Connectivity', 'Miscellaneous'
                        ]

LAPTOPS_ALL_POSSIBLE_ASPECTS = [['laptop', 'general'], ['laptop', 'operation', 'performance'], ['laptop', 'usability'],
                                ['laptop', 'portability'], ['laptop', 'price'], ['laptop', 'quality'],
                                ['laptop', 'design', 'features'], ['laptop', 'miscellaneous'],
                                ['laptop', 'connectivity'],
                                ['display', 'usability'], ['display', 'design', 'features'], ['display', 'quality'],
                                ['display', 'general'], ['display', 'operation', 'performance'],
                                ['cpu', 'operation', 'performance'], ['cpu', 'quality'], ['cpu', 'miscellaneous'],
                                ['cpu', 'design', 'features'],
                                ['motherboard', 'quality'],
                                ['hard', 'disc', 'design', 'features'], ['hard', 'disc', 'quality'],
                                ['memory', 'design', 'features'],
                                ['battery', 'quality'], ['battery', 'miscellaneous'],
                                ['battery', 'operation', 'performance'],
                                ['power', 'supply', 'quality'], ['power', 'supply', 'operation', 'performance'],
                                ['power', 'supply', 'design', 'features'], ['power', 'supply', 'miscellaneous'],
                                ['keyboard', 'design', 'features'], ['keyboard', 'general'], ['keyboard', 'usability'],
                                ['keyboard', 'quality'], ['keyboard', 'operation', 'performance'],
                                ['mouse', 'design', 'features'], ['mouse', 'quality'],
                                ['mouse', 'operation', 'performance'],
                                ['mouse', 'usability'], ['mouse', 'general'],
                                ['fans', 'cooling', 'design', 'features'], ['fans', 'cooling', 'quality'],
                                ['fans', 'cooling', 'operation', 'performance'],
                                ['optical', 'drives', 'quality'], ['optical', 'drives', 'operation', 'performance'],
                                ['ports', 'quality'], ['ports', 'design', 'features'],
                                ['ports', 'operation', 'performance'],
                                ['graphics', 'quality'], ['graphics', 'general'], ['graphics', 'design_features'],
                                ['graphics', 'miscellaneous'],
                                ['multimedia', 'devices', 'usability'], ['multimedia', 'devices', 'miscellaneous'],
                                ['multimedia', 'devices', 'operation', 'performance'],
                                ['multimedia', 'devices', 'quality'],
                                ['multimedia', 'devices', 'general'], ['multimedia', 'devices', 'design', 'features'],
                                ['hardware', 'quality'], ['hardware', 'general'],
                                ['hardware', 'operation', 'performance'],
                                ['os', 'design', 'features'], ['os', 'general'], ['os', 'usability'],
                                ['os', 'miscellaneous'],
                                ['os', 'operation', 'performance'], ['os', 'quality'],
                                ['software', 'miscellaneous'], ['software', 'general'], ['software', 'quality'],
                                ['software', 'usability'], ['software', 'design', 'features'],
                                ['software', 'operation', 'performance'], ['software', 'price'],
                                ['warranty', 'general'], ['warranty', 'price'],
                                ['shipping', 'quality'], ['shipping', 'price'],
                                ['support', 'quality'], ['support', 'miscellaneous'], ['support', 'price'],
                                ['company', 'general']]

LAPTOPS_ASPECT_WORD_INDEX_MAP = {
    'laptopgeneral': 0,
    'laptopprice': 1,
    'laptopquality': 2,
    'laptopoperationperformance': 3,
    'laptopusability': 4,
    'laptopdesignfeatures': 5,
    'laptopportability': 6,
    'laptopconnectivity': 7,
    'laptopmiscellaneous': 8,
    'displaygeneral': 9,
    'displayquality': 10,
    'displayoperationperformance': 11,
    'displayusability': 12,
    'displaydesignfeatures': 13,
    'displayportability': 14,
    'displaymiscellaneous': 15,
    'cpugeneral': 16,
    'cpuprice': 17,
    'cpuquality': 18,
    'cpuoperationperformance': 19,
    'cpudesignfeatures': 20,
    'cpumiscellaneous': 21,
    'motherboardgeneral': 22,
    'motherboardprice': 23,
    'motherboardquality': 24,
    'motherboarddesignfeatures': 25,
    'motherboardmiscellaneous': 26,
    'harddiscgeneral': 27,
    'harddiscprice': 28,
    'harddiscquality': 29,
    'harddiscoperationperformance': 30,
    'harddiscdesignfeatures': 31,
    'harddiscmiscellaneous': 32,
    'memorygeneral': 33,
    'memoryprice': 34,
    'memorydesignfeatures': 35,
    'memorymiscellaneous': 36,
    'batterygeneral': 37,
    'batteryquality': 38,
    'batteryoperationperformance': 39,
    'batterydesignfeatures': 40,
    'batterymiscellaneous': 41,
    'powersupplygeneral': 42,
    'powersupplyprice': 43,
    'powersupplyquality': 44,
    'powersupplyoperationperformance': 45,
    'powersupplydesignfeatures': 46,
    'powersupplymiscellaneous': 47,
    'keyboardgeneral': 48,
    'keyboardquality': 49,
    'keyboardoperationperformance': 50,
    'keyboardusability': 51,
    'keyboarddesignfeatures': 52,
    'keyboardmiscellaneous': 53,
    'mousegeneral': 54,
    'mousequality': 55,
    'mouseoperationperformance': 56,
    'mouseusability': 57,
    'mousedesignfeatures': 58,
    'mousemiscellaneous': 59,
    'fanscoolinggeneral': 60,
    'fanscoolingquality': 61,
    'fanscoolingoperationperformance': 62,
    'fanscoolingdesignfeatures': 63,
    'fanscoolingmiscellaneous': 64,
    'opticaldrivesgeneral': 65,
    'opticaldrivesquality': 66,
    'opticaldrivesoperationperformance': 67,
    'opticaldrivesdesignfeatures': 68,
    'opticaldrivesmiscellaneous': 69,
    'portsgeneral': 70,
    'portsquality': 71,
    'portsoperationperformance': 72,
    'portsdesignfeatures': 73,
    'portsmiscellaneous': 74,
    'graphicsgeneral': 75,
    'graphicsquality': 76,
    'graphicsdesignfeatures': 77,
    'graphicsmiscellaneous': 78,
    'multimediadevicesgeneral': 79,
    'multimediadevicesquality': 80,
    'multimediadevicesoperationperformance': 81,
    'multimediadevicesusability': 82,
    'multimediadevicesdesignfeatures': 83,
    'multimediadevicesmiscellaneous': 84,
    'hardwaregeneral': 85,
    'hardwarequality': 86,
    'hardwareoperationperformance': 87,
    'hardwareusability': 88,
    'hardwaredesignfeatures': 89,
    'hardwaremiscellaneous': 90,
    'osgeneral': 91,
    'osquality': 92,
    'osoperationperformance': 93,
    'osusability': 94,
    'osdesignfeatures': 95,
    'osmiscellaneous': 96,
    'softwaregeneral': 97,
    'softwareprice': 98,
    'softwarequality': 99,
    'softwareoperationperformance': 100,
    'softwareusability': 101,
    'softwaredesignfeatures': 102,
    'softwaremiscellaneous': 103,
    'warrantygeneral': 104,
    'warrantyprice': 105,
    'warrantymiscellaneous': 106,
    'shippinggeneral': 107,
    'shippingprice': 108,
    'shippingquality': 109,
    'shippingmiscellaneous': 110,
    'supportgeneral': 111,
    'supportprice': 112,
    'supportquality': 113,
    'supportmiscellaneous': 114,
    'companygeneral': 115,
    'none': 116
}

LAPTOPS_INDEX_TO_ASPECT_WORD_MAP = {
    0: 'laptopgeneral',
    1: 'laptopprice',
    2: 'laptopquality',
    3: 'laptopoperationperformance',
    4: 'laptopusability',
    5: 'laptopdesignfeatures',
    6: 'laptopportability',
    7: 'laptopconnectivity',
    8: 'laptopmiscellaneous',
    9: 'displaygeneral',
    10: 'displayquality',
    11: 'displayoperationperformance',
    12: 'displayusability',
    13: 'displaydesignfeatures',
    14: 'displayportability',
    15: 'displaymiscellaneous',
    16: 'cpugeneral',
    17: 'cpuprice',
    18: 'cpuquality',
    19: 'cpuoperationperformance',
    20: 'cpudesignfeatures',
    21: 'cpumiscellaneous',
    22: 'motherboardgeneral',
    23: 'motherboardprice',
    24: 'motherboardquality',
    25: 'motherboarddesignfeatures',
    26: 'motherboardmiscellaneous',
    27: 'harddiscgeneral',
    28: 'harddiscprice',
    29: 'harddiscquality',
    30: 'harddiscoperationperformance',
    31: 'harddiscdesignfeatures',
    32: 'harddiscmiscellaneous',
    33: 'memorygeneral',
    34: 'memoryprice',
    35: 'memorydesignfeatures',
    36: 'memorymiscellaneous',
    37: 'batterygeneral',
    38: 'batteryquality',
    39: 'batteryoperationperformance',
    40: 'batterydesignfeatures',
    41: 'batterymiscellaneous',
    42: 'powersupplygeneral',
    43: 'powersupplyprice',
    44: 'powersupplyquality',
    45: 'powersupplyoperationperformance',
    46: 'powersupplydesignfeatures',
    47: 'powersupplymiscellaneous',
    48: 'keyboardgeneral',
    49: 'keyboardquality',
    50: 'keyboardoperationperformance',
    51: 'keyboardusability',
    52: 'keyboarddesignfeatures',
    53: 'keyboardmiscellaneous',
    54: 'mousegeneral',
    55: 'mousequality',
    56: 'mouseoperationperformance',
    57: 'mouseusability',
    58: 'mousedesignfeatures',
    59: 'mousemiscellaneous',
    60: 'fanscoolinggeneral',
    61: 'fanscoolingquality',
    62: 'fanscoolingoperationperformance',
    63: 'fanscoolingdesignfeatures',
    64: 'fanscoolingmiscellaneous',
    65: 'opticaldrivesgeneral',
    66: 'opticaldrivesquality',
    67: 'opticaldrivesoperationperformance',
    68: 'opticaldrivesdesignfeatures',
    69: 'opticaldrivesmiscellaneous',
    70: 'portsgeneral',
    71: 'portsquality',
    72: 'portsoperationperformance',
    73: 'portsdesignfeatures',
    74: 'portsmiscellaneous',
    75: 'graphicsgeneral',
    76: 'graphicsquality',
    77: 'graphicsdesignfeatures',
    78: 'graphicsmiscellaneous',
    79: 'multimediadevicesgeneral',
    80: 'multimediadevicesquality',
    81: 'multimediadevicesoperationperformance',
    82: 'multimediadevicesusability',
    83: 'multimediadevicesdesignfeatures',
    84: 'multimediadevicesmiscellaneous',
    85: 'hardwaregeneral',
    86: 'hardwarequality',
    87: 'hardwareoperationperformance',
    88: 'hardwareusability',
    89: 'hardwaredesignfeatures',
    90: 'hardwaremiscellaneous',
    91: 'osgeneral',
    92: 'osquality',
    93: 'osoperationperformance',
    94: 'osusability',
    95: 'osdesignfeatures',
    96: 'osmiscellaneous',
    97: 'softwaregeneral',
    98: 'softwareprice',
    99: 'softwarequality',
    100: 'softwareoperationperformance',
    101: 'softwareusability',
    102: 'softwaredesignfeatures',
    103: 'softwaremiscellaneous',
    104: 'warrantygeneral',
    105: 'warrantyprice',
    106: 'warrantymiscellaneous',
    107: 'shippinggeneral',
    108: 'shippingprice',
    109: 'shippingquality',
    110: 'shippingmiscellaneous',
    111: 'supportgeneral',
    112: 'supportprice',
    113: 'supportquality',
    114: 'supportmiscellaneous',
    115: 'companygeneral',
    116: 'none'
}

In [70]:
def build_word_frequency_distribution():
    """
    1. Extract tokens from the review text
    2. Calculate frequency of each token
    3. Create a freq dict and store it in a file

    :return: A dict of <token, freq>
    """
    try:
        freq_dist_f = read_binary(WORD_FREQ_FILE)
        print('frequency distribution loaded')
        return freq_dist_f
    except IOError:
        pass

    print('building frequency distribution')
    freq = defaultdict(int)
    if FILE_NAME == 'restaurant':
        for aspect_word in RESTAURANT_ASPECT_WORDS:
            freq[aspect_word] += 1
    elif FILE_NAME == 'laptops':
        for aspect_word in LAPTOPS_ASPECT_WORDS:
            freq[aspect_word] += 1

    files = [FORMATTED_FILE_NAME]
    #if EMBEDDING_TYPE == 'fasttext':
        #files.append(FORMATTED_FILE_NAME.replace('train', 'test'))
        #files.append(FORMATTED_FILE_NAME.replace('train', 'val'))

    for file_path in files:
        print('building vocab from file - ' + file_path)
        for i, review in enumerate(read_binary(file_path)):
            sentences = review[1]

            for sent in sentences:
                tokens = NLP.tokenizer(sent[0])
                for token in tokens:
                    freq[token.orth_] += 1
                if i % 100 == 0:
                    write_binary(freq, WORD_FREQ_FILE)
                    print('dump at {}'.format(i))
            write_binary(freq, WORD_FREQ_FILE)
    return freq


def build_vocabulary(lower = 1, n = MAX_VOCAB_SIZE):
    """
    1. Get word frequency distribution
    2. Sort is based on word frequencies
    3. Make a vocab dist using the most frequent words
    4. Store vocab dist in a file in format <word, identifier>

    :param lower: Identifiers below this are reserved
    :param n: Number of unique expected words
    :return: A dict of vocabulary words and an assigned identifier
    """

    try:
        vocab_to_code = read_binary(VOCAB_TO_CODE_FILE)
        code_to_vocab = read_binary(CODE_TO_VOCAB_FILE)
        print('vocabulary loaded')
        return vocab_to_code, code_to_vocab
    except IOError:
        print('building vocabulary')
    freq = build_word_frequency_distribution()

    # get glove embeddings
    print('loading embeddings')
    if EMBEDDING_TYPE == 'glove':
        word_to_embeddings = load_glove_embeddings()
    elif EMBEDDING_TYPE == 'fasttext':
        word_to_embeddings = load_oov_fastText_embeddings()
    else:
        word_to_embeddings = {}

    # sorting words in ascending order based on frequency and then pick top n words
    top_words = list(sorted(freq.items(), key = lambda x: -x[1]))[:n - lower + 1]
    # create optimum vocab size
    print('Vocab count : ' + str(len(top_words)))
    # global MAX_VOCAB_SIZE
    # global UNKNOWN
    max_vocab_size = len(top_words) + 2
    unknown = max_vocab_size - 1
    vocab_to_code = {}
    code_to_vocab = {}

    # an array of embeddings with index referring to the vocab code. First and last index is
    # reserved for padding and unknown words respectively.
    code_to_embed = np.zeros(shape = (max_vocab_size, EMBEDDING_DIMENSION), dtype = np.float32)
    code_to_embed[PAD] = PAD_EMBEDDING
    code_to_embed[unknown] = UNKNOWN_EMBEDDING
    vocab_to_code['<UNK>'] = unknown
    code_to_vocab[unknown] = '<UNK>'
    vocab_to_code['<PAD>'] = PAD
    code_to_vocab[PAD] = '<PAD>'

    # lower vocab indexes are reserved for padding and unknown words
    i = lower
    for w, freq in top_words:
        vocab_to_code[w] = i
        code_to_vocab[i] = w
        try:
            if EMBEDDING_TYPE == 'glove':
                embedding = word_to_embeddings.word_vec(w)
            elif EMBEDDING_TYPE == 'fasttext':
                embedding = word_to_embeddings.get_word_vector(w)
        except KeyError:
            embedding = UNKNOWN_EMBEDDING

        if EMBEDDING_TYPE in ['glove', 'fasttext']:
            code_to_embed[i] = embedding
        i += 1
    write_binary(vocab_to_code, VOCAB_TO_CODE_FILE)
    write_binary(code_to_vocab, CODE_TO_VOCAB_FILE)
    write_binary(code_to_embed, CODE_TO_EMBED_FILE)
    return vocab_to_code, code_to_vocab


def get_uncoded_data(code_to_vocab, datapoint):
    aspect_words = []
    review = []
    aspect_codes = datapoint[0]
    coded_sentences = datapoint[1]
    polarities = datapoint[2]

    for aspect_code in aspect_codes:
        aspect_words.append(code_to_vocab.get(aspect_code))

    for sentence in coded_sentences:
        sent_words = []
        for coded_word in sentence:
            sent_words.append(code_to_vocab.get(coded_word))
        review.append(sent_words)
    x = [aspect_words, review, polarities]
    return x


def process_data():
    vocab_to_code, code_to_vocab = build_vocabulary()
    max_vocab_size = len(vocab_to_code)
    unknown = max_vocab_size - 1
    print('Final Vocab Size : ' + str(max_vocab_size))
    try:
        coded_dataset = []
        for i, review in enumerate(read_binary(FORMATTED_FILE_NAME)):
            coded_aspect = []
            coded_sentences = []

            if i == 0:
                print(review)

            sentences = review[1]
            aspect_words = review[0]
            polarities = review[2]

            for aspect_word in aspect_words:
                coded_aspect.append(vocab_to_code.get(aspect_word, unknown))

            for sent in sentences:
                coded_sentence = []
                tokens = NLP.tokenizer(sent[0])
                for token in tokens:
                    coded_sentence.append(vocab_to_code.get(token.orth_, unknown))
                coded_sentences.append(coded_sentence)

            coded_review = [coded_aspect, coded_sentences, polarities]

            # dataset
            coded_dataset.append(coded_review)
            write_binary(coded_dataset, PROCESSED_FILE_NAME)
            print('dump at {}'.format(i))

        datapoint = coded_dataset[0]
        print(datapoint)
        print(get_uncoded_data(code_to_vocab, datapoint))
    except KeyboardInterrupt:
        pass


In [71]:
process_data()

building vocabulary
frequency distribution loaded
loading embeddings
loading ovv fastext model..
Vocab count : 3952
Final Vocab Size : 3954
[['restaurant', 'general'], [['Judging from previous posts this used to be a good place, but not any longer.'], ['We, there were four of us, arrived at noon - the place was empty - and the staff acted like we were imposing on them and they were very rude.'], ['They never brought us complimentary noodles, ignored repeated requests for sugar, and threw our dishes on the table.'], ['The food was lousy - too sweet or too salty and the portions tiny.'], ['After all that, they complained to me about the small tip.'], ['Avoid this place!']], [1, 3, 3, 3, 3, 1]]
dump at 0
dump at 1
dump at 2
dump at 3
dump at 4
dump at 5
dump at 6
dump at 7
dump at 8
dump at 9
dump at 10
dump at 11
dump at 12
dump at 13
dump at 14
dump at 15
dump at 16
dump at 17
dump at 18
dump at 19
dump at 20
dump at 21
dump at 22
dump at 23
dump at 24
dump at 25
dump at 26
dump at 27
d

dump at 664
dump at 665
dump at 666
dump at 667
dump at 668
dump at 669
dump at 670
dump at 671
dump at 672
dump at 673
dump at 674
dump at 675
dump at 676
dump at 677
dump at 678
dump at 679
dump at 680
dump at 681
dump at 682
dump at 683
dump at 684
dump at 685
dump at 686
dump at 687
dump at 688
dump at 689
dump at 690
dump at 691
dump at 692
dump at 693
dump at 694
dump at 695
dump at 696
dump at 697
dump at 698
dump at 699
dump at 700
dump at 701
dump at 702
dump at 703
dump at 704
dump at 705
dump at 706
dump at 707
dump at 708
dump at 709
dump at 710
dump at 711
dump at 712
dump at 713
dump at 714
dump at 715
dump at 716
dump at 717
dump at 718
dump at 719
dump at 720
dump at 721
dump at 722
dump at 723
dump at 724
dump at 725
dump at 726
dump at 727
dump at 728
dump at 729
dump at 730
dump at 731
dump at 732
dump at 733
dump at 734
dump at 735
dump at 736
dump at 737
dump at 738
dump at 739
dump at 740
dump at 741
dump at 742
dump at 743
dump at 744
dump at 745
dump at 746
dump

dump at 1327
dump at 1328
dump at 1329
dump at 1330
dump at 1331
dump at 1332
dump at 1333
dump at 1334
dump at 1335
dump at 1336
dump at 1337
dump at 1338
dump at 1339
dump at 1340
dump at 1341
dump at 1342
dump at 1343
dump at 1344
dump at 1345
dump at 1346
dump at 1347
dump at 1348
dump at 1349
dump at 1350
dump at 1351
dump at 1352
dump at 1353
dump at 1354
dump at 1355
dump at 1356
dump at 1357
dump at 1358
dump at 1359
dump at 1360
dump at 1361
dump at 1362
dump at 1363
dump at 1364
dump at 1365
dump at 1366
dump at 1367
dump at 1368
dump at 1369
dump at 1370
dump at 1371
dump at 1372
dump at 1373
dump at 1374
dump at 1375
dump at 1376
dump at 1377
dump at 1378
dump at 1379
dump at 1380
dump at 1381
dump at 1382
dump at 1383
dump at 1384
dump at 1385
dump at 1386
dump at 1387
dump at 1388
dump at 1389
dump at 1390
dump at 1391
dump at 1392
dump at 1393
dump at 1394
dump at 1395
dump at 1396
dump at 1397
dump at 1398
dump at 1399
dump at 1400
dump at 1401
dump at 1402
dump at 1403

dump at 1964
dump at 1965
dump at 1966
dump at 1967
dump at 1968
dump at 1969
dump at 1970
dump at 1971
dump at 1972
dump at 1973
dump at 1974
dump at 1975
dump at 1976
dump at 1977
dump at 1978
dump at 1979
dump at 1980
dump at 1981
dump at 1982
dump at 1983
dump at 1984
dump at 1985
dump at 1986
dump at 1987
dump at 1988
dump at 1989
dump at 1990
dump at 1991
dump at 1992
dump at 1993
dump at 1994
dump at 1995
dump at 1996
dump at 1997
dump at 1998
dump at 1999
dump at 2000
dump at 2001
dump at 2002
dump at 2003
dump at 2004
dump at 2005
dump at 2006
dump at 2007
dump at 2008
dump at 2009
dump at 2010
dump at 2011
dump at 2012
dump at 2013
dump at 2014
dump at 2015
dump at 2016
dump at 2017
dump at 2018
dump at 2019
dump at 2020
dump at 2021
dump at 2022
dump at 2023
dump at 2024
dump at 2025
dump at 2026
dump at 2027
dump at 2028
dump at 2029
dump at 2030
dump at 2031
dump at 2032
dump at 2033
dump at 2034
dump at 2035
dump at 2036
dump at 2037
dump at 2038
dump at 2039
dump at 2040

dump at 2596
dump at 2597
dump at 2598
dump at 2599
dump at 2600
dump at 2601
dump at 2602
dump at 2603
dump at 2604
dump at 2605
dump at 2606
dump at 2607
dump at 2608
dump at 2609
dump at 2610
dump at 2611
dump at 2612
dump at 2613
dump at 2614
dump at 2615
dump at 2616
dump at 2617
dump at 2618
dump at 2619
dump at 2620
dump at 2621
dump at 2622
dump at 2623
dump at 2624
dump at 2625
dump at 2626
dump at 2627
dump at 2628
dump at 2629
dump at 2630
dump at 2631
dump at 2632
dump at 2633
dump at 2634
dump at 2635
dump at 2636
dump at 2637
dump at 2638
dump at 2639
dump at 2640
dump at 2641
dump at 2642
dump at 2643
dump at 2644
dump at 2645
dump at 2646
dump at 2647
dump at 2648
dump at 2649
dump at 2650
dump at 2651
dump at 2652
dump at 2653
dump at 2654
dump at 2655
dump at 2656
dump at 2657
dump at 2658
dump at 2659
dump at 2660
dump at 2661
dump at 2662
dump at 2663
dump at 2664
dump at 2665
dump at 2666
dump at 2667
dump at 2668
dump at 2669
dump at 2670
dump at 2671
dump at 2672

dump at 3230
dump at 3231
dump at 3232
dump at 3233
dump at 3234
dump at 3235
dump at 3236
dump at 3237
dump at 3238
dump at 3239
dump at 3240
dump at 3241
dump at 3242
dump at 3243
dump at 3244
dump at 3245
dump at 3246
dump at 3247
dump at 3248
dump at 3249
dump at 3250
dump at 3251
dump at 3252
dump at 3253
dump at 3254
dump at 3255
dump at 3256
dump at 3257
dump at 3258
dump at 3259
dump at 3260
dump at 3261
dump at 3262
dump at 3263
dump at 3264
dump at 3265
dump at 3266
dump at 3267
dump at 3268
dump at 3269
dump at 3270
dump at 3271
dump at 3272
dump at 3273
dump at 3274
dump at 3275
dump at 3276
dump at 3277
dump at 3278
dump at 3279
dump at 3280
dump at 3281
dump at 3282
dump at 3283
dump at 3284
dump at 3285
dump at 3286
dump at 3287
dump at 3288
dump at 3289
dump at 3290
dump at 3291
dump at 3292
dump at 3293
dump at 3294
dump at 3295
dump at 3296
dump at 3297
dump at 3298
dump at 3299
dump at 3300
dump at 3301
dump at 3302
dump at 3303
dump at 3304
dump at 3305
dump at 3306

dump at 3864
dump at 3865
dump at 3866
dump at 3867
dump at 3868
dump at 3869
dump at 3870
dump at 3871
dump at 3872
dump at 3873
dump at 3874
dump at 3875
dump at 3876
dump at 3877
dump at 3878
dump at 3879
dump at 3880
dump at 3881
dump at 3882
dump at 3883
dump at 3884
dump at 3885
dump at 3886
dump at 3887
dump at 3888
dump at 3889
dump at 3890
dump at 3891
dump at 3892
dump at 3893
dump at 3894
dump at 3895
dump at 3896
dump at 3897
dump at 3898
dump at 3899
dump at 3900
dump at 3901
dump at 3902
dump at 3903
dump at 3904
dump at 3905
dump at 3906
dump at 3907
dump at 3908
dump at 3909
dump at 3910
dump at 3911
dump at 3912
dump at 3913
dump at 3914
dump at 3915
dump at 3916
dump at 3917
dump at 3918
dump at 3919
dump at 3920
dump at 3921
dump at 3922
dump at 3923
dump at 3924
dump at 3925
dump at 3926
dump at 3927
dump at 3928
dump at 3929
dump at 3930
dump at 3931
dump at 3932
dump at 3933
dump at 3934
dump at 3935
dump at 3936
dump at 3937
dump at 3938
dump at 3939
dump at 3940