In [1]:
# Reload modules every time before executing the Python code typed
%load_ext autoreload
%autoreload 2

# Import from project root
import sys; sys.path.insert(0, '../')

# Load test data
from access.interim_storage import InterimStorage
import pprint

# Test class Datasets
from amore.reviews import Reviews
from amore.review import Review
from amore.opinion_lexicon import OpinionLexicon
from access.file_storage import FileStorage
from amore.amazon_reviews_reader import AmazonReviewsReader
from gensim.utils import simple_preprocess

## Load test data

In [2]:
# Config
write_file_id = 'amore-test-2002-2003-1-5'
interim_storage = InterimStorage(write_file_id)
print('filepath:', interim_storage.get_filepath())
# filepath: /tmp/InterimStorage/amore-test-2002-2003-1-5.pickle.bz2

filepath: /tmp/InterimStorage/amore-test-2002-2003-1-5.pickle.bz2


In [3]:
# Load
if True:
    results = interim_storage.read()

In [4]:
# Print
if False:
    pprint.pprint(results)

for year in results.keys():
    for star in results[year].keys():
        print(year, star, len(results[year][star]))

2002 1 10
2002 5 10
2003 1 10
2003 5 10


## Test class Datasets

In [5]:
# From amore.py
file_storage = FileStorage()
opinion_lexicon = OpinionLexicon(file_storage.get_filepath('opinion-words'))
opinion_max_pos = opinion_lexicon.get_extremum_length(maximum=True, positive=True)
opinion_max_neg = opinion_lexicon.get_extremum_length(maximum=True, positive=False)

In [6]:
# From amore.py
def extract_opinion_words(text, positive=True, min_len=3, max_len=24):
    token_set = set(simple_preprocess(text, min_len=min_len, max_len=max_len))
    if(positive):
        return opinion_lexicon.extract_positive_words(token_set)
    else:
        return opinion_lexicon.extract_negative_words(token_set)

# Create datasets form previously loaded results
reviews = Reviews()
for year in results.keys():
    for star in results[year].keys():
        for review in results[year][star]:
            text = review[AmazonReviewsReader.KEY_SUMMARY] + ' ' + review[AmazonReviewsReader.KEY_TEXT]
            reviews.add_review(year, star,
                                Review(review[AmazonReviewsReader.KEY_NUMBER],
                                       len(extract_opinion_words(text, positive=True, min_len=3, max_len=opinion_max_pos)),
                                       len(extract_opinion_words(text, positive=False, min_len=3, max_len=opinion_max_neg))
            ))

In [8]:
# Print
if True:
    pprint.pprint(reviews.get_positive_sorted_tuple(2002, 5))
    #pprint.pprint(reviews.get_positive_sorted(2002, 5))
    #pprint.pprint(reviews.get_positive_sorted(2003, 5))
    print()
    pprint.pprint(reviews.get_negative_sorted_tuple(2002, 1))
    #pprint.pprint(reviews.get_negative_sorted(2002, 1))
    #pprint.pprint(reviews.get_negative_sorted(2003, 1))

[(131, 19, 4),
 (130, 35, 21),
 (9, 10, 1),
 (54, 11, 3),
 (105, 11, 4),
 (108, 6, 1),
 (119, 9, 4),
 (17, 3, 0),
 (48, 5, 2),
 (57, 4, 1)]

[(2164, 2, 8),
 (231, 23, 25),
 (279, 6, 8),
 (295, 3, 5),
 (338, 5, 6),
 (349, 2, 2),
 (2155, 7, 7),
 (333, 4, 3),
 (292, 8, 5),
 (2159, 13, 8)]


In [9]:
reviews.write_review_objects()

Wrote: /tmp/InterimStorage/amore-counter.pickle.bz2


<amore.reviews.Reviews at 0x7f12c9f2a550>

In [10]:
reviews = Reviews().read_review_objects()

Loaded: /tmp/InterimStorage/amore-counter.pickle.bz2


## Load test data

In [11]:
write_file_id = 'test1' # from amore.ipynb
reviews = Reviews()
reviews.read_review_objects(file_id=write_file_id) # from amore.py

Loaded: /tmp/InterimStorage/test1.pickle.bz2


<amore.reviews.Reviews at 0x7f12c9f2a1f0>

## Simple write test

In [13]:
print(InterimStorage(id_='AMORE-IDs').write('sdf').get_filepath())

/tmp/InterimStorage/AMORE-IDs.pickle.bz2
