In [31]:
## Importing necessary Libraries 
import ctypes, inspect, os, graphlab
from graphlab import SFrame
import pandas as pd

In [84]:
# This code reads the data into a graphlab Sframe. The dataset used is an Office Products dataset containing 53,257 reviews   
# retrieved from: http://jmcauley.ucsd.edu/data/amazon/ citation: "Image-based recommendations on styles and 
# substitutes" J. McAuley, C. Targett, J. Shi, A. van den Hengel, SIGIR, 2015
Office_Products = graphlab.SFrame.read_json('Office_Products_5.json', orient='lines')

------------------------------------------------------
Inferred types from first line of file as 
column_type_hints=[dict]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [85]:
## Exploring the dataset
Office_Products.head()

asin,helpful,overall,reviewText,reviewTime,reviewerID,reviewerName
B00000JBLH,"[3, 4]",5.0,"I bought my first HP12C in about 1984 or so, and ...","09 3, 2004",A32T2H8150OJLU,ARH
B00000JBLH,"[7, 9]",5.0,WHY THIS BELATED REVIEW? I feel very obliged to ...,"12 15, 2007",A3MAFS04ZABRGO,"Let it Be ""Alan"""
B00000JBLH,"[3, 3]",2.0,I have an HP 48GX that has been kicking for ...,"01 1, 2011",A1F1A0QQP2XVH5,Mark B
B00000JBLH,"[7, 8]",5.0,I've started doing more finance stuff recently ...,"04 19, 2006",A49R5DBXXQDE5,R. D Johnson
B00000JBLH,"[0, 0]",5.0,For simple calculations and discounted cash ...,"08 4, 2013",A2XRMQA6PJ5ZJ8,Roger J. Buffington
B00000JBLH,"[10, 12]",5.0,"While I don't have an MBA, it's hard to bel ...","01 23, 2002",A2JFOHC9W629IE,scott_from_dallas
B00000JBLH,"[3, 4]",5.0,I've had an HP 12C ever since they were first ...,"01 17, 2007",A38NELQT98S4H8,W. B. Halper
B00000JBLH,"[0, 0]",5.0,Bought this for my boss because he lost his. He ...,"11 14, 2013",AA8M6331NI1EN,ZombieMom
B00000JBLU,"[3, 3]",5.0,"This is a well-designed, simple calculator that ...","12 7, 2010",A25C2M3QF9G7OQ,Comdet
B00000JBLU,"[0, 0]",5.0,"I love this calculator, big numbers and calcu ...","12 2, 2013",A1RTVWTWZSIC94,"Hb ""Black Beauty"""

summary,unixReviewTime
"A solid performer, and long time friend ...",1094169600
"Price of GOLD is up, so don't bury the golden ...",1197676800
"Good functionality, but not durable like old HPs ...",1293840000
One of the last of an almost extinct species ...,1145404800
Still the best,1375574400
Every MBA student and grad should get one ...,1011744000
A workhorse of a calculator ...,1168992000
Fast shipping & great price for this awesome ...,1384387200
"Nice design, works well, great value ...",1291680000
Love It!!!!!!!!!,1385942400


In [33]:
## Counting the number of rows 
len(Office_Products)

53258

In [86]:
## Setting target as the notebook
graphlab.canvas.set_target('ipynb')

In [87]:
## Checking the rating distribution. Most reviews have a 5 rating. 30,327 votes (56.944%)
Office_Products['overall'].show(view='Categorical') 

In [88]:
# building a word count vector for each review 
Office_Products['word_count'] = graphlab.text_analytics.count_words(Office_Products['reviewText'])

In [89]:
# Removing all stopwords
Office_Products['No_Stop_words'] = Office_Products['word_count'].dict_trim_by_keys(graphlab.text_analytics.stopwords(), exclude=True)

In [90]:
# Limiting the review data to only helpful reviews. The ‘helpful’ column contains values like ‘[x, y]’.
# The first value represents the number of helpful votes, the second represents overall votes. 
# If at least 60% consider the review helpful, we want those reviews. A binary 1 or 0 column will identify the review as helpful or not.
import numpy as np

Office_Products['helpful_count'] = Office_Products['helpful'].apply(lambda x: x[0])
Office_Products['overall_count'] = Office_Products['helpful'].apply(lambda x: x[1])
Office_Products['helpful_percent'] = Office_Products['helpful_count'] / Office_Products['overall_count']
Office_Products['helpful_review'] = Office_Products['helpful_percent'] >= .6
Office_Products = Office_Products[Office_Products['overall_count']!= 0 ]


In [91]:
Office_Products.head()

asin,helpful,overall,reviewText,reviewTime,reviewerID
B00000JBLH,"[3, 4]",5.0,"I bought my first HP12C in about 1984 or so, and ...","09 3, 2004",A32T2H8150OJLU
B00000JBLH,"[7, 9]",5.0,WHY THIS BELATED REVIEW? I feel very obliged to ...,"12 15, 2007",A3MAFS04ZABRGO
B00000JBLH,"[3, 3]",2.0,I have an HP 48GX that has been kicking for ...,"01 1, 2011",A1F1A0QQP2XVH5
B00000JBLH,"[7, 8]",5.0,I've started doing more finance stuff recently ...,"04 19, 2006",A49R5DBXXQDE5
B00000JBLH,"[10, 12]",5.0,"While I don't have an MBA, it's hard to bel ...","01 23, 2002",A2JFOHC9W629IE
B00000JBLH,"[3, 4]",5.0,I've had an HP 12C ever since they were first ...,"01 17, 2007",A38NELQT98S4H8
B00000JBLU,"[3, 3]",5.0,"This is a well-designed, simple calculator that ...","12 7, 2010",A25C2M3QF9G7OQ
B00000JBLU,"[2, 2]",5.0,I've had mine for 4 years now and use it almost ...,"07 10, 2007",A30U2QQN2FFHE9
B00000JBLU,"[1, 2]",5.0,"If you need a calculator that prints, this is not ...","07 15, 2008",A2W0XCWOK3MW6F
B00000JBLU,"[6, 6]",5.0,"Texas Instruments makes an excellent, relatively ...","06 22, 2010",A2R6RA8FRBS608

reviewerName,summary,unixReviewTime,word_count
ARH,"A solid performer, and long time friend ...",1094169600,"{'area.': 1L, 'hand,': 1L, 'over': 1L, 'move': ..."
"Let it Be ""Alan""","Price of GOLD is up, so don't bury the golden ...",1197676800,"{'all': 3L, 'ever.': 1L, 'four': 1L, 'desktops': ..."
Mark B,"Good functionality, but not durable like old HPs ...",1293840000,"{'and': 4L, '12c': 1L, 'old': 1L, 'classic': ..."
R. D Johnson,One of the last of an almost extinct species ...,1145404800,"{'12c': 2L, 'stamped': 1L, 'cellphones,': 1L, ..."
scott_from_dallas,Every MBA student and grad should get one ...,1011744000,"{""don't"": 1L, 'over': 1L, 'planning': 1L, 'duri ..."
W. B. Halper,A workhorse of a calculator ...,1168992000,"{'12c': 2L, 'just': 1L, 'being': 1L, 'money': ..."
Comdet,"Nice design, works well, great value ...",1291680000,"{'all': 1L, 'help': 1L, ""don't"": 1L, 'comma': ..."
J. Amicucci,Great basic calculator,1184025600,"{'and': 1L, 'all': 1L, 'almost': 1L, '-': 1L, ..."
"K. Roman ""Happy Librarian"" ...",Perfect,1216080000,"{'and': 1L, 'model': 1L, '%.': 1L, 'figure': 2L, ..."
Matthew G. Sherwin,all right by me !!!,1277164800,"{'all': 1L, 'just': 1L, ""don't"": 2L, 'is': 5L, ..."

No_Stop_words,helpful_count,overall_count,helpful_percent,helpful_review
"{'friend.': 1L, 'area.': 1L, 'set': 1L, ...",3,4,0.75,1
"{'go.even': 1L, 'bearer': 1L, 'gold': 9L, ""don't"": ...",7,9,0.777777777778,1
"{'12c': 1L, 'classic': 1L, 'feel': 2L, 'hard': ...",3,3,1.0,1
"{'12c': 2L, 'stamped': 1L, 'cellphones,': 1L, ...",7,8,0.875,1
"{'mba,': 1L, 'value,': 1L, ""isn't"": 1L, ""don ...",10,12,0.833333333333,1
"{'12c': 2L, 'financial': 1L, 'methods': 1L, ...",3,4,0.75,1
"{'calculator.my': 1L, 'angles': 1L, 'math.': ...",3,3,1.0,1
"{'calculator': 1L, 'hard': 1L, 'mine': 1L, ...",2,2,1.0,1
"{'%.': 1L, 'figure': 2L, 'calculator': 3L, 'tax': ...",1,2,0.5,0
"{'key': 1L, 'function': 2L, 'square': 1L, ...",6,6,1.0,1


In [92]:
# Building the sentiment classifier. Performing a random 80/20 split to select the training and testing data.
train_data, test_data = Office_Products.random_split(.8, seed=0)

In [93]:
sentiment_model_Logist = graphlab.logistic_classifier.create(train_data,
                                                            target = 'helpful_review',
                                                            features = ['No_Stop_words'],
                                                            validation_set = test_data)

In [94]:
## evaluating the model
sentiment_model_Logist.evaluate(test_data, metric = 'roc_curve')

{'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 100001
 
 Data:
 +-----------+----------------+----------------+------+------+
 | threshold |      fpr       |      tpr       |  p   |  n   |
 +-----------+----------------+----------------+------+------+
 |    0.0    |      1.0       |      1.0       | 2938 | 1187 |
 |   1e-05   | 0.996630160067 | 0.999659632403 | 2938 | 1187 |
 |   2e-05   | 0.995787700084 | 0.999659632403 | 2938 | 1187 |
 |   3e-05   | 0.995787700084 | 0.998978897209 | 2938 | 1187 |
 |   4e-05   | 0.995787700084 | 0.998978897209 | 2938 | 1187 |
 |   5e-05   | 0.995787700084 | 0.998978897209 | 2938 | 1187 |
 |   6e-05   | 0.995787700084 | 0.998978897209 | 2938 | 1187 |
 |   7e-05   | 0.995787700084 | 0.998978897209 | 2938 | 1187 |
 |   8e-05   | 0.995787700084 | 0.998978897209 | 2938 | 1187 |
 |   9e-05   | 0.994945240101 | 0.998978897209 | 2938 | 1187 |
 +-----------+----------------+----------------+------+------+
 [100001 row

In [96]:
# True Positive = 1189 False Negative = 1718 Accuracy = 0.685 Precision = 0.423 false Positive = 1625 True Negative = 6077 recall = 0.409
sentiment_model_Logist.show(view = 'Evaluation')

In [97]:
## Trying a SVM model
sentiment_model_SVM = graphlab.svm_classifier.create(train_data,
                                                     target = 'helpful_review',
                                                     features = ['No_Stop_words'],
                                                     validation_set = test_data)

In [98]:
## Evaluating the model
coefficients = sentiment_model_SVM['coefficients'] 

In [99]:
# Make predictions (as margins, or class)
predictions = sentiment_model_SVM.predict(Office_Products)    # Predicts 0/1
predictions = sentiment_model_SVM.predict(Office_Products, output_type='margin')

In [100]:
results = sentiment_model_SVM.evaluate(Office_Products)  

In [101]:
results

{'accuracy': 0.9335616112617111, 'confusion_matrix': Columns:
 	target_label	int
 	predicted_label	int
 	count	int
 
 Rows: 4
 
 Data:
 +--------------+-----------------+-------+
 | target_label | predicted_label | count |
 +--------------+-----------------+-------+
 |      1       |        0        |  245  |
 |      0       |        1        |  1152 |
 |      0       |        0        |  4843 |
 |      1       |        1        | 14787 |
 +--------------+-----------------+-------+
 [4 rows x 3 columns], 'f1_score': 0.9548932872687352, 'precision': 0.9277244494635799, 'recall': 0.9837014369345396}

In [102]:
## Finding the ID of the most frequently reviewed item. 
Office_Products['asin'].show()   ## B0010T3QT2 count = 311 (58.4% of the reviews)

In [103]:
B0035FX5MC_Reviews = Office_Products[Office_Products['asin'] == 'B0035FX5MC']

In [104]:
len(B0035FX5MC_Reviews)

122

In [105]:
# Applying the SVM model
Office_Products['Predicted_Helpfulness_SVM'] = sentiment_model_SVM.predict(Office_Products, output_type = 'margin')

In [106]:
## Sorting reviews based on the predicted sentiment 
Office_Products = Office_Products.sort('Predicted_Helpfulness_SVM', ascending =False)

In [107]:
# Applying the Logistic Regression model
Office_Products['Predicted_Helpfulness_Logist'] = sentiment_model_Logist.predict(Office_Products, output_type = 'probability')

In [108]:
## Sorting reviews based on the predicted sentiment 
Office_Products = Office_Products.sort('Predicted_Helpfulness_Logist', ascending =False)

In [109]:
Office_Products.head()

asin,helpful,overall,reviewText,reviewTime,reviewerID
B005D5M12M,"[78, 79]",5.0,I've owned an HP color LaserJet for quite some ...,"08 23, 2012",A12B7ZMXFI6IXY
B001S9HWKS,"[4, 4]",5.0,"Excellent Investment.Easy to use.Intuitive, simple ...","06 18, 2011",A2V5UI37R7ESEV
B002JM1XRQ,"[21, 21]",5.0,This is quite possibly the best printer that ...,"01 18, 2010",A38NELQT98S4H8
B0063B8B92,"[18, 20]",5.0,Our house is starting to look something like a ...,"12 28, 2011",AUITG1DJ3QUGK
B004H3XKR6,"[32, 34]",5.0,INSTALLATION PROCESS:*** Smooth and intuitive. I ...,"02 1, 2011",A2VWE5SGNDS8HW
B005IVL0RS,"[30, 33]",3.0,Update - 1/6/2012 - A recent firmware release ...,"12 9, 2011",A38NELQT98S4H8
B001R4BTI0,"[43, 45]",4.0,"I have used an HP Scanjet 4670 for years, and ...","03 6, 2011",A1GQGYROVZVW49
B002MCZIUG,"[11, 11]",4.0,Going against the old adage of don't fix what ...,"11 28, 2009",A2BMZRO0H7TFCS
B0001EMLZW,"[106, 111]",5.0,Have been a stalwart HP financial calculator ...,"03 3, 2005",A33TZR3VQJMKIZ
B0064VZD06,"[131, 134]",5.0,The quality of the prints out of this printer is ...,"06 28, 2012",ASFP250F22MDZ

reviewerName,summary,unixReviewTime,word_count
"Hyoun Kim ""Faluzure""",Excellent Printer - Worth Every Penny - A FEW ...,1345680000,"{'all': 4L, 'tape/wrapping,': 1L, ..."
C. L. Smith,Digital Postal/Shipping Scale - 35LB ...,1308355200,"{'all': 1L, '(5)': 1L, 'seller,': 1L, ..."
W. B. Halper,Quite possibly the best printer that I've ever ...,1263772800,"{'all': 2L, 'yellow': 1L, 'debris': 1L, 'stops': ..."
E. Swope,Nice printer...... (revised 1/2/2012) ...,1325030400,"{'all': 7L, 'reviews.': 1L, 'consider': 5L, ..."
"tachi1 ""tachi1""",Incredibly versatile-- does many things very ...,1296518400,"{'all': 6L, 'concept': 1L, '(it': 1L, 'saves': ..."
W. B. Halper,A firmware update improved this printer. ...,1323388800,"{'hl3070w': 1L, 'eight- page': 1L, 'all': 5L, ..."
P. Schmidt,Nice scanner for former HP 4670 users ...,1299369600,"{'all': 5L, 'borders.i': 1L, 'caused': 1L, ..."
Ed,Premium Quality & Price,1259366400,"{'limited': 1L, 'all': 5L, 'update.': 1L, ..."
Pruitt Hall,Easily better than ANY current HP--and 1/2 the ...,1109808000,"{'all': 3L, '(it': 1L, 'chain': 2L, '(in': 1L, ..."
But I'm feeling much better now... ...,Absolutely Amazing Printer ...,1340841600,"{'osx': 1L, 'all': 7L, 'switched': 1L, ..."

No_Stop_words,helpful_count,overall_count,helpful_percent,helpful_review,Predicted_Helpfulness_SVM
"{'tape/wrapping,': 1L, 'reviewers': 1L, '(if': ...",78,79,0.987341772152,1,23.1260111626
"{'easy-to-use': 1L, ""don't"": 1L, 'money': ...",4,4,1.0,1,21.9688292539
"{'yellow': 1L, 'debris': 1L, 'saved': 1L, ...",21,21,1.0,1,22.0911251045
"{'reviews.': 1L, ':)': 1L, '(2184b002)for': 1L, ...",18,20,0.9,1,22.2063680869
"{'concept': 1L, '(it': 1L, 'saves': 1L, ...",32,34,0.941176470588,1,22.3063079713
"{'hl3070w': 1L, 'eight- page': 1L, 'pictures""': ...",30,33,0.909090909091,1,22.399484825
"{'caused': 1L, 'saves': 1L, 'results': 2L, ...",43,45,0.955555555556,1,22.4754488917
"{'limited': 1L, 'update.': 1L, 'updat ...",11,11,1.0,1,22.4852004058
"{'no,': 1L, '12c': 1L, '(it': 1L, 'feedback': ...",106,111,0.954954954955,1,22.5003473115
"{'osx': 1L, 'switched': 1L, 'better,': 1L, ...",131,134,0.977611940299,1,22.5464618232

Predicted_Helpfulness_Log ist ...
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0


In [110]:
Office_Products[0]['reviewText']

'I\'ve owned an HP color LaserJet for quite some time now, but the only problem is that is streaks. Something is wrong with the fuser/toner cartridges and despite me getting a local printer/copier technician to look at it, he thinks it can\'t be fixed. Not wanting to spend any more money trying to get it repaired, I\'m just holding onto it until the toner runs out.But recently, my my wife is planning on going back to college. Back to college means writing papers, printing out stuff, and all that jazz and she needed a printer that didn\'t have a huge streak going down each sheet. On top of that, she wanted a printer that was capable of copying/scanning. Having procrastinated for a while now, I did notice that another Canon Pixma (PIXMA MG6220) was the Gold Box Deal of the Day. That printer retailed $199.99 but was on sale for $59.99. Instead of pulling the trigger on it, I decided to wait until she woke up. Unfortunately, that printer sold out lightning fast. When I checked back, some k

In [111]:
Office_Products[-1]['reviewText']



asin,helpful,overall,reviewText,reviewTime,reviewerID
B001V9LQH0,"[44, 48]",4.0,UPDATE 12/06/13: I'm really surprised that ...,"12 2, 2011",A2LEIANN1UZTHP
B0095F5BCS,"[254, 265]",5.0,&#2951; Fuzzy Wuzzy's Sum mary:&#1150;&#1150;&# ...,"06 6, 2013",A24HWYHR28JJ7A
B003VQR1UC,"[1382, 1417]",5.0,"SUMMARYWith a few caveats, this is a great ...","10 8, 2010",AI0BCEWRE04G0
B003YL412U,"[469, 479]",4.0,!!!WARNING!!! B200 ERROR:I wrote this re ...,"09 26, 2010",A2VW4FYZILSXF2
B003VQR1TS,"[612, 624]",5.0,"SUMMARYWith a few caveats, this is a great ...","10 3, 2010",AI0BCEWRE04G0
B001B19XPA,"[121, 131]",4.0,&#2951; Fuzzy Wuzzy's Sum mary:&#1150;&#1150;&# ...,"09 25, 2008",A24HWYHR28JJ7A
B00EHDZMAY,"[561, 567]",4.0,Canon PIXMA MG7120 Black/White/Red/Brown ...,"12 13, 2013",A3D7BGK0P5QY0X
B00AGV7T5W,"[832, 849]",4.0,Canon PIXMA MG5420 Wireless Color Photo ...,"03 11, 2013",A3D7BGK0P5QY0X
B00HM0IV4S,"[2, 2]",4.0,Pros:Excellent text qualityGood picture ...,"04 27, 2014",A2VW4FYZILSXF2
B00AGV7TOS,"[800, 812]",4.0,Canon PIXMA MG6320 Black (or White) Wireless C ...,"01 28, 2013",A3D7BGK0P5QY0X

reviewerName,summary,unixReviewTime,word_count
brainout,"Scanner 6 stars, menu and manual -2 stars ...",1322784000,"{'limited': 2L, 'searchable': 5L, 'fo ..."
&#131;&#367;&#142;&#378;& #376; &#969;&#364;&#... ...,Excellent photo lab- quality 11x14 and 13x19 ...,1370476800,"{'all': 13L, 'cameras': 1L, ':)': 1L, 'better""': ..."
Stoney,A Great Little Scanner,1286496000,"{'all': 6L, 'consider': 2L, 'better,': 1L, ..."
"Jojoleb ""jojoleb""",Jack-of-all trades and master of most: a ...,1285459200,"{'replacing': 2L, 'now': 4L, 'pictures.': 2L, ..."
Stoney,A great little scanner,1286064000,"{'all': 6L, 'consider': 2L, 'scratch': 1L, ..."
&#131;&#367;&#142;&#378;& #376; &#969;&#364;&#... ...,"Great combination of print quality, speed, ...",1222300800,"{'subtle...': 1L, 'handling.': 1L, ..."
Neil E. Isenberg,Review: Canon PIXMA MG7120 Color Photo ...,1386892800,"{'reviews,': 2L, 'all': 2L, 'consider': 5L, ..."
Neil E. Isenberg,Pros and Cons for this Excellent Entry point ...,1362960000,"{'reviews,': 2L, 'all': 2L, 'consider': 6L, ..."
"Jojoleb ""jojoleb""","A highly competent, multifunction printer ...",1398556800,"{'limited': 1L, 'all': 5L, 'duplex': 1L, '12 ..."
Neil E. Isenberg,Review: Canon PIXMA MG6320 Color Photo ...,1359331200,"{'reviews,': 2L, 'all': 2L, 'consider': 4L, ..."

No_Stop_words,helpful_count,overall_count,helpful_percent,helpful_review,Predicted_Helpfulness
"{'limited': 2L, 'searchable': 5L, ...",44,48,0.916666666667,1,86.7409624125
"{'cameras': 1L, ':)': 1L, 'better""': 1L, 'retain': ...",254,265,0.958490566038,1,69.8159254354
"{'better,': 1L, 'beware': 2L, '(folded)': 1L, ...",1382,1417,0.975299929428,1,64.7914236811
"{'replacing': 2L, 'pictures.': 2L, ...",469,479,0.979123173278,1,63.8626841061
"{'scratch': 1L, 'beware': 2L, '(folded)': 1L, ...",612,624,0.980769230769,1,58.5204865687
"{'subtle...': 1L, 'update""': 1L, ...",121,131,0.923664122137,1,54.3987361916
"{'reviews,': 2L, 'reviewers': 1L, ...",561,567,0.989417989418,1,53.661465048
"{'reviews,': 2L, 'film/slide': 1L, ...",832,849,0.979976442874,1,52.7733098758
"{'limited': 1L, '1200': 1L, 'value.in': 1L, ...",2,2,1.0,1,50.3290186407
"{'reviews,': 2L, 'reviewers': 1L, '532 ...",800,812,0.985221674877,1,41.602650943




'&#2951; Fuzzy Wuzzy\'s Summary:&#1150;&#1150;&#1150;&#1150;&#1150; Highly recommended with warm fuzzies!&#1411; Positives:&#1411; This printer can output superb gallery-quality prints with exceptional detail and resolution, while being significantly less expensive than the higher-end Pro-10 and Pro-1 printer models.&#1411; This Pro-100 prints better blacks and grays on monochrome prints than the older Pro9000 printer that it replaces.&#1411; The new wireless connectivity options of using 802.11b/g/n Wi-Fi or Apple\'s AirPrint, along with a wired Ethernet option, provide flexibility of shared usage and printer placement by not needing to be physically connected to a USB cable.&#1411; The dye-based inks used in this Pro-100 can retain their colors for 100+ years.&#4335; Negatives:&#4335; While black-and-white prints are better than the previous Pro9000 printer, monochrome output is still inferior to the pricier Pro-10 and Pro-1 printers.If you have never used a prosumer-level dedicated 



'I\'ve owned an HP color LaserJet for quite some time now, but the only problem is that is streaks. Something is wrong with the fuser/toner cartridges and despite me getting a local printer/copier technician to look at it, he thinks it can\'t be fixed. Not wanting to spend any more money trying to get it repaired, I\'m just holding onto it until the toner runs out.But recently, my my wife is planning on going back to college. Back to college means writing papers, printing out stuff, and all that jazz and she needed a printer that didn\'t have a huge streak going down each sheet. On top of that, she wanted a printer that was capable of copying/scanning. Having procrastinated for a while now, I did notice that another Canon Pixma (PIXMA MG6220) was the Gold Box Deal of the Day. That printer retailed $199.99 but was on sale for $59.99. Instead of pulling the trigger on it, I decided to wait until she woke up. Unfortunately, that printer sold out lightning fast. When I checked back, some k

asin,helpful,overall,reviewText,reviewTime,reviewerID
B005D5M12M,"[78, 79]",5.0,I've owned an HP color LaserJet for quite some ...,"08 23, 2012",A12B7ZMXFI6IXY
B001S9HWKS,"[4, 4]",5.0,"Excellent Investment.Easy to use.Intuitive, simple ...","06 18, 2011",A2V5UI37R7ESEV
B002JM1XRQ,"[21, 21]",5.0,This is quite possibly the best printer that ...,"01 18, 2010",A38NELQT98S4H8
B0063B8B92,"[18, 20]",5.0,Our house is starting to look something like a ...,"12 28, 2011",AUITG1DJ3QUGK
B004H3XKR6,"[32, 34]",5.0,INSTALLATION PROCESS:*** Smooth and intuitive. I ...,"02 1, 2011",A2VWE5SGNDS8HW
B005IVL0RS,"[30, 33]",3.0,Update - 1/6/2012 - A recent firmware release ...,"12 9, 2011",A38NELQT98S4H8
B001R4BTI0,"[43, 45]",4.0,"I have used an HP Scanjet 4670 for years, and ...","03 6, 2011",A1GQGYROVZVW49
B002MCZIUG,"[11, 11]",4.0,Going against the old adage of don't fix what ...,"11 28, 2009",A2BMZRO0H7TFCS
B0001EMLZW,"[106, 111]",5.0,Have been a stalwart HP financial calculator ...,"03 3, 2005",A33TZR3VQJMKIZ
B0064VZD06,"[131, 134]",5.0,The quality of the prints out of this printer is ...,"06 28, 2012",ASFP250F22MDZ

reviewerName,summary,unixReviewTime,word_count
"Hyoun Kim ""Faluzure""",Excellent Printer - Worth Every Penny - A FEW ...,1345680000,"{'all': 4L, 'tape/wrapping,': 1L, ..."
C. L. Smith,Digital Postal/Shipping Scale - 35LB ...,1308355200,"{'all': 1L, '(5)': 1L, 'seller,': 1L, ..."
W. B. Halper,Quite possibly the best printer that I've ever ...,1263772800,"{'all': 2L, 'yellow': 1L, 'debris': 1L, 'stops': ..."
E. Swope,Nice printer...... (revised 1/2/2012) ...,1325030400,"{'all': 7L, 'reviews.': 1L, 'consider': 5L, ..."
"tachi1 ""tachi1""",Incredibly versatile-- does many things very ...,1296518400,"{'all': 6L, 'concept': 1L, '(it': 1L, 'saves': ..."
W. B. Halper,A firmware update improved this printer. ...,1323388800,"{'hl3070w': 1L, 'eight- page': 1L, 'all': 5L, ..."
P. Schmidt,Nice scanner for former HP 4670 users ...,1299369600,"{'all': 5L, 'borders.i': 1L, 'caused': 1L, ..."
Ed,Premium Quality & Price,1259366400,"{'limited': 1L, 'all': 5L, 'update.': 1L, ..."
Pruitt Hall,Easily better than ANY current HP--and 1/2 the ...,1109808000,"{'all': 3L, '(it': 1L, 'chain': 2L, '(in': 1L, ..."
But I'm feeling much better now... ...,Absolutely Amazing Printer ...,1340841600,"{'osx': 1L, 'all': 7L, 'switched': 1L, ..."

No_Stop_words,helpful_count,overall_count,helpful_percent,helpful_review,Predicted_Helpfulness
"{'tape/wrapping,': 1L, 'reviewers': 1L, '(if': ...",78,79,0.987341772152,1,1.0
"{'easy-to-use': 1L, ""don't"": 1L, 'money': ...",4,4,1.0,1,1.0
"{'yellow': 1L, 'debris': 1L, 'saved': 1L, ...",21,21,1.0,1,1.0
"{'reviews.': 1L, ':)': 1L, '(2184b002)for': 1L, ...",18,20,0.9,1,1.0
"{'concept': 1L, '(it': 1L, 'saves': 1L, ...",32,34,0.941176470588,1,1.0
"{'hl3070w': 1L, 'eight- page': 1L, 'pictures""': ...",30,33,0.909090909091,1,1.0
"{'caused': 1L, 'saves': 1L, 'results': 2L, ...",43,45,0.955555555556,1,1.0
"{'limited': 1L, 'update.': 1L, 'updat ...",11,11,1.0,1,1.0
"{'no,': 1L, '12c': 1L, '(it': 1L, 'feedback': ...",106,111,0.954954954955,1,1.0
"{'osx': 1L, 'switched': 1L, 'better,': 1L, ...",131,134,0.977611940299,1,1.0


asin,helpful,overall,reviewText,reviewTime,reviewerID
B009NSGO3M,"[0, 2]",4.0,****The 3M Easy-Adjust Dual Monitor Arm ...,"10 11, 2013",A2AWVROFGSZU4E
B0074ZN27C,"[3, 6]",4.0,First up the packaging. The box seems a little ...,"03 21, 2012",A25QJBK33C4O0R
B004HY9IJE,"[0, 1]",3.0,With 61 out of 92 people rating this a 4 or 5 out ...,"06 30, 2014",A1SYLII0808HD6
B001GXM68Y,"[2, 7]",5.0,This CASIO HS-8VA calculator is presently ...,"07 10, 2011",A2A1XYSB692L6J
B000OOYECC,"[1, 2]",4.0,I write for a living. I use this stand with a ...,"11 6, 2013",A3775OP5VTX5ON
B004H3XKR6,"[0, 1]",4.0,SetupSetting up was easy enough I didn't even ...,"03 17, 2011",AR3EVUQF0AC7R
B001B0BE6M,"[1, 10]",3.0,You place one or more objects inside the clear ...,"06 16, 2011",A2AWVROFGSZU4E
B00007LVCN,"[3, 8]",4.0,The Avery 8395 labels -- generically called name ...,"04 10, 2011",A2AWVROFGSZU4E
B005HFJFK4,"[4, 19]",2.0,I have owned and/or reviewed over 5 diffe ...,"03 21, 2012",APDPA11IZPYLN
B001YTK3XK,"[0, 1]",2.0,"UPDATE, 12/09/13: this machine is WAY ...","09 11, 2012",A2LEIANN1UZTHP

reviewerName,summary,unixReviewTime,word_count
,Little chepth in quality or price ...,1381449600,"{'all': 2L, 'managed': 1L, 'skip': 1L, 'four': ..."
"terpfan1980 ""Barry""","Nice, solid device",1332288000,"{'default': 1L, 'all': 3L, 'code': 1L, '(it': ..."
M. Allen Greenbaum,"3.5* ""It was the Best of Binders; It was the S ...",1404086400,"{'limited': 2L, 'all': 2L, 'better.': 1L, 'go': ..."
"Patricia ""A Reader""",H A N D S O M E ......C A L C U L A T O ...,1310256000,"{'all': 3L, 'showing,': 1L, 'quibble.this': 1L, ..."
"Coolkayaker ""ck1""","Used for one year, true tips here! ...",1383696000,"{'all': 3L, 'mp3': 1L, 'chair': 1L, '(which': ..."
morning fog,Great home office/small business aio ...,1300320000,"{'limited': 1L, 'all': 3L, 'consider': 1L, ..."
My Fake Name,"ACLU card yes, bowling ball no ...",1308182400,"{'all': 1L, 'four': 1L, 'dish': 1L, 'right.--': ..."
My Fake Name,"Useful features, but at a price ...",1302393600,"{'all': 4L, 'forget': 1L, 'forbidden': 1L, 'dog- ..."
"Sheila Chilcote-Collins ""Sheila Renee Chilcot... ...",I Cannot Stress This Enough - Stay Away From ...,1332288000,"{'all': 4L, '(it': 6L, 'updated:01/02/2012': ..."
brainout,"6 stars for printer, -3 for BAD manuals -1 for ...",1347321600,"{'limited': 1L, 'ever.': 1L, 'searchable': 3L, ..."

No_Stop_words,helpful_count,overall_count,helpful_percent,helpful_review,Predicted_Helpfulness
"{'(ma260mb)': 2L, 'titled': 1L, ...",0,2,0.0,0,4.44713951066e-42
"{'default': 1L, 'code': 1L, '(it': 1L, ...",3,6,0.5,0,1.15571599387e-42
"{'wall!': 1L, 'limited': 2L, 'denial,': 1L, ...",0,1,0.0,0,2.7233862376999997e-44
"{'precise': 1L, 'hs-8va': 2L, 'showing,': 1L, ...",2,7,0.285714285714,0,6.24942583528e-46
"{'mp3': 1L, 'chair': 1L, '(which': 1L, 'issues': ...",1,2,0.5,0,6.018625085129999e-46
"{'thecoolest': 1L, ""theprinter's"": 1L, ...",0,1,0.0,0,3.63454989453e-52
"{'standard-issue': 1L, '27.002': 1L, 'office': ...",1,10,0.1,0,1.87957123256e-52
"{'forget': 1L, 'forbidden': 1L, 'dog- ...",3,8,0.375,0,1.01749636076e-61
"{'to993,': 2L, '(it': 6L, 'updated:01/02/2012': ...",4,19,0.210526315789,0,2.42852681978e-93
"{'limited': 1L, 'ever.': 1L, 'searchable': 3L, ...",0,1,0.0,0,1.2690033193299998e-237
