In [None]:
# Reload modules every time before executing the Python code typed
%load_ext autoreload
%autoreload 2

# Import from project root
import sys; sys.path.insert(0, '../')

# Load test data
from access.interim_storage import InterimStorage
import pprint

# Test class Datasets
from amore.reviews import Reviews
from amore.review import Review
from amore.opinion_lexicon import OpinionLexicon
from access.file_storage import FileStorage
from amore.amazon_reviews_reader import AmazonReviewsReader
from gensim.utils import simple_preprocess

## Load test data

In [None]:
# Config
write_file_id = 'amore-test-2002-2003-1-5'
interim_storage = InterimStorage(write_file_id)
print('filepath:', interim_storage.get_filepath())
# filepath: /tmp/InterimStorage/amore-test-2002-2003-1-5.pickle.bz2

In [None]:
# Load
if True:
    results = interim_storage.read()

In [None]:
# Print
if False:
    pprint.pprint(results)

for year in results.keys():
    for star in results[year].keys():
        print(year, star, len(results[year][star]))

## Test class Datasets

In [None]:
# From amore.py
file_storage = FileStorage()
opinion_lexicon = OpinionLexicon(file_storage.get_filepath('opinion-words'))
opinion_max_pos = opinion_lexicon.get_extremum_length(maximum=True, positive=True)
opinion_max_neg = opinion_lexicon.get_extremum_length(maximum=True, positive=False)

In [None]:
# From amore.py
def extract_opinion_words(text, positive=True, min_len=3, max_len=24):
    token_set = set(simple_preprocess(text, min_len=min_len, max_len=max_len))
    if(positive):
        return opinion_lexicon.extract_positive_words(token_set)
    else:
        return opinion_lexicon.extract_negative_words(token_set)

# Create datasets form previously loaded results
reviews = Reviews()
for year in results.keys():
    for star in results[year].keys():
        for review in results[year][star]:
            text = review[AmazonReviewsReader.KEY_SUMMARY] + ' ' + review[AmazonReviewsReader.KEY_TEXT]
            reviews.add_review(year, star,
                                Review(review[AmazonReviewsReader.KEY_NUMBER],
                                       len(extract_opinion_words(text, positive=True, min_len=3, max_len=opinion_max_pos)),
                                       len(extract_opinion_words(text, positive=False, min_len=3, max_len=opinion_max_neg))
            ))

In [None]:
# Print
if True:
    pprint.pprint(reviews.get_positive_sorted_tuple(2002, 5))
    #pprint.pprint(reviews.get_positive_sorted(2002, 5))
    #pprint.pprint(reviews.get_positive_sorted(2003, 5))
    print()
    pprint.pprint(reviews.get_negative_sorted_tuple(2002, 1))
    #pprint.pprint(reviews.get_negative_sorted(2002, 1))
    #pprint.pprint(reviews.get_negative_sorted(2003, 1))

In [None]:
reviews.write_review_objects()

In [None]:
reviews = Reviews().read_review_objects()

## Load test data

In [None]:
write_file_id = 'test1' # from amore.ipynb
reviews = Reviews()
reviews.read_review_objects(file_id=write_file_id) # from amore.py