In [1]:
import pandas as pd
import numpy as np


import matplotlib.pyplot as plt
plt.style.use ('ggplot')
import seaborn as sns
%matplotlib inline
sns.set_style(style="whitegrid")

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.decomposition import NMF
from sklearn.feature_extraction import text

from bs4 import BeautifulSoup

import nltk, string, contractions

from gensim.corpora.dictionary import Dictionary
from gensim.models.nmf import Nmf
from gensim.models.coherencemodel import CoherenceModel

from operator import itemgetter

In [2]:
df = pd.read_csv('data/modeling_ready_microwave1')

In [3]:
df.head()

Unnamed: 0,customer_id,review_id,product_id,star_rating,helpful_votes,total_votes,verified_purchase,review_headline,review_body,review_date,review_wordcount,clean_review
0,44300577,R2ZU11YALTJNZX,B0009KMYHI,1.0,0.0,0.0,Y,I have had this microwave for just over 3 year...,I have had this microwave for just over 3 year...,2015-08-31,91,I have had this microwave for just over 3 year...
1,50952586,RVBITZNBVJ8AI,B0009KMYHI,5.0,0.0,0.0,Y,It takes a good bit of room on the counter,Man this thing cooks evenly and fast! It takes...,2015-08-30,23,Man this thing cooks evenly and fast! It takes...
2,31144708,R44LZL0OR5EWP,B0009KMYDM,5.0,0.0,0.0,Y,Five Stars,Works great! Have had it over a year and no su...,2015-08-30,17,Works great! Have had it over a year and no su...
3,14667788,RPWWYNUD9X64U,B0009KMYHI,5.0,0.0,0.0,Y,... received this several years ago and it sti...,I ordered and received this several years ago ...,2015-08-29,13,I ordered and received this several years ago ...
4,25468208,RS76DCCGM685B,B0009KMYGY,1.0,3.0,3.0,Y,Avoid Panasonic - Doors Don't Close,I have had this microwave for just over 2 year...,2015-08-28,131,I have had this microwave for just over 2 year...


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1875 entries, 0 to 1874
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   customer_id        1875 non-null   int64  
 1   review_id          1875 non-null   object 
 2   product_id         1875 non-null   object 
 3   star_rating        1875 non-null   float64
 4   helpful_votes      1875 non-null   float64
 5   total_votes        1875 non-null   float64
 6   verified_purchase  1875 non-null   object 
 7   review_headline    1875 non-null   object 
 8   review_body        1875 non-null   object 
 9   review_date        1875 non-null   object 
 10  review_wordcount   1875 non-null   int64  
 11  clean_review       1875 non-null   object 
dtypes: float64(3), int64(2), object(7)
memory usage: 175.9+ KB


In [5]:
df.review_date = pd.to_datetime(df.review_date)

In [6]:
df.describe()

Unnamed: 0,customer_id,star_rating,helpful_votes,total_votes,review_wordcount
count,1875.0,1875.0,1875.0,1875.0,1875.0
mean,32094010.0,3.850133,3.107733,3.628267,80.606933
std,14762230.0,1.496151,20.062047,20.926332,97.269705
min,87220.0,1.0,0.0,0.0,1.0
25%,17487800.0,3.0,0.0,0.0,28.0
50%,33032780.0,5.0,1.0,1.0,51.0
75%,46245250.0,5.0,2.0,2.0,94.0
max,53094260.0,5.0,589.0,599.0,1152.0


In [7]:
def remove_punctuations(text):
    punct =[]
    punct += list(string.punctuation)
    punct += '’'
    punct.remove("'")
    for punctuation in punct:
        text = text.replace(punctuation, ' ')
    return text

In [8]:
def nlp_prep(df):
    # lowercase everything
    # get rid of '\n' from whitespace
    # regex remove hyperlinks
    # remove punctuation
    # remove ' s ' from removing punctuation
    
    # lowercase everything
    df['model_ready'] = df['clean_review'].apply(lambda x: x.lower())
    # expand contractions
    df['model_ready'] = df['model_ready'].apply(lambda x:[contractions.fix(word) for word in x.split()])
    df['model_ready'] = [' '.join(map(str, l)) for l in df['model_ready']]
    # get rid of '\n' from whitespace 
    df['model_ready'] = df['model_ready'].apply(lambda x: x.replace('\n', ' '))
    # regex remove hyperlinks
    df['model_ready'] = df['model_ready'].str.replace('http\S+|www.\S+', '', case=False)
    # remove punctuations
    df['model_ready'] = df['model_ready'].apply(remove_punctuations)
    # remove ' s ' that was created after removing punctuations
    df['model_ready'] = df['model_ready'].apply(lambda x: str(x).replace(" s ", " "))
    return df

In [9]:
nlp_prep(df)

Unnamed: 0,customer_id,review_id,product_id,star_rating,helpful_votes,total_votes,verified_purchase,review_headline,review_body,review_date,review_wordcount,clean_review,model_ready
0,44300577,R2ZU11YALTJNZX,B0009KMYHI,1.0,0.0,0.0,Y,I have had this microwave for just over 3 year...,I have had this microwave for just over 3 year...,2015-08-31,91,I have had this microwave for just over 3 year...,i have had this microwave for just over 3 year...
1,50952586,RVBITZNBVJ8AI,B0009KMYHI,5.0,0.0,0.0,Y,It takes a good bit of room on the counter,Man this thing cooks evenly and fast! It takes...,2015-08-30,23,Man this thing cooks evenly and fast! It takes...,man this thing cooks evenly and fast it takes...
2,31144708,R44LZL0OR5EWP,B0009KMYDM,5.0,0.0,0.0,Y,Five Stars,Works great! Have had it over a year and no su...,2015-08-30,17,Works great! Have had it over a year and no su...,works great have had it over a year and no su...
3,14667788,RPWWYNUD9X64U,B0009KMYHI,5.0,0.0,0.0,Y,... received this several years ago and it sti...,I ordered and received this several years ago ...,2015-08-29,13,I ordered and received this several years ago ...,i ordered and received this several years ago ...
4,25468208,RS76DCCGM685B,B0009KMYGY,1.0,3.0,3.0,Y,Avoid Panasonic - Doors Don't Close,I have had this microwave for just over 2 year...,2015-08-28,131,I have had this microwave for just over 2 year...,i have had this microwave for just over 2 year...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1870,33705501,R34GY91QSWWUE4,B0009KMYGY,4.0,60.0,64.0,N,Just right for me...,The Panasonic NN-H765WF was exactly what I was...,2006-03-23,74,The Panasonic NN-H765WF was exactly what I was...,the panasonic nn h765wf was exactly what i was...
1871,39268146,R7N1LRCV8GU6L,B0009KMYHI,5.0,5.0,10.0,Y,good looking-good cooking,this microwave cooks food very evenly-no need ...,2006-03-19,21,this microwave cooks food very evenly-no need ...,this microwave cooks food very evenly no need ...
1872,19314267,R15KWS9QSI7URC,B0009KMYDM,5.0,12.0,13.0,Y,Panasonic NN-H965WF Luxury full-size 2.2 cu.ft...,"its a great microwave , i use it every day , i...",2006-03-16,67,"its a great microwave , i use it every day , i...",its a great microwave i use it every day i...
1873,11400155,RK5HSUM1GNRH8,B0009KMYDM,5.0,4.0,9.0,N,Panasonic Microwave Oven NN-965WF,Have had it for only a short time but it has p...,2006-02-24,20,Have had it for only a short time but it has p...,have had it for only a short time but it has p...


In [10]:
w_tokenizer = nltk.tokenize.WhitespaceTokenizer()
lemmatizer = nltk.WordNetLemmatizer()

def lemmatize_text(text):
    return [lemmatizer.lemmatize(w) for w in w_tokenizer.tokenize(text)]

In [11]:
keep_words = ['not', 'ain', 'aren', "aren't", 'couldn', "couldn't", 
              'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 
              'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 
              'ma', 'mightn', "mightn't", 'mustn', "mustn't", 'needn',
              "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 
              'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 
              'wouldn', "wouldn't", 'fire', 'off']

In [12]:
test_stop_words = ['great', 'excellent', '1', '2', 'feature', 'nice', 'old']
review_stop_words = ['panasonic', 'really', 'husband', 'thanks', 'thank', 'ha', 
              'just', 'thing', 'did', 'nn', 'wa', 'yr', 'u', 'say', 'doe',
              'mom', 'christmas', 'gift', 'got', 'way', 'le', 'daughter',
              'e','not','love','good','bought','great microwave','micro',
              'great oven','microwave','product','work great','nice work',
              'work great use','work great love','feature work great',
              'unit work great','oven work great','easy use love','old oven',
              'old old','20 year old','unit', 'not', 'work', 'amazon', 'com',
              'old', 'wife', 'highly', 'recommend', 'like', 'charm', '20', 
              'easy', 'oven', 'use', 'year', 'lot', 'pleased', 'happy', 'hope']
for _ in text.ENGLISH_STOP_WORDS:
    if _ in keep_words:
        pass
    else:
        review_stop_words.append(_)
for _ in test_stop_words:
    review_stop_words.append(_)

In [13]:
def vectorize_this(max_features, min_df, max_df, ngram_max):
    vectorizer = TfidfVectorizer(tokenizer = lemmatize_text,
                             stop_words= review_stop_words,
                             max_features = max_features,
                             min_df = min_df,
                             max_df =  max_df,
                             ngram_range=(0, ngram_max)
                            )
    X = df['model_ready']
    X = vectorizer.fit_transform(X)
    return X, vectorizer

In [14]:
X, vectorizer = vectorize_this(10000, 4, .8, 2)

In [15]:
def make_nmf(n_components, alpha, X):
    nmf = NMF(
            n_components=n_components,
            init='nndsvd',
            random_state=12345,
            alpha = alpha
            ).fit(X)

    W = nmf.fit_transform(X)
    H = nmf.components_
    return nmf, W, H

In [16]:
nmf, W, H, = make_nmf(17, .1, X)

In [17]:
def topic_keywords(vectorizer=vectorizer, lda_model=nmf, n_words=20):
    keywords = np.array(vectorizer.get_feature_names())
    topic_keywords = []
    for topic_weights in lda_model.components_:
        top_keyword_locs = (-topic_weights).argsort()[:n_words]
        topic_keywords.append(keywords.take(top_keyword_locs))
    return topic_keywords

In [18]:
topic_keywords = topic_keywords(vectorizer=vectorizer, lda_model=nmf, n_words=10)        

In [19]:
topic_keywords

[array(['time', 'second', 'cooking', 'cooking time', 'long', 'long time',
        'item', 'arrived time', 'minute', 'start'], dtype='<U22'),
 array(['door', 'latch', 'close', 'door latch', 'problem', 'slam', 'shut',
        'door close', 'hard', 'slam door'], dtype='<U22'),
 array(['service', 'repair', 'day', 'warranty', 'customer',
        'customer service', 'center', 'shipping', 'called', 'purchase'],
       dtype='<U22'),
 array(['large', 'fit', 'space', 'size', 'counter', 'kitchen',
        'counter space', 'need', 'needed', 'dish'], dtype='<U22'),
 array(['sensor', 'reheat', 'sensor reheat', 'defrost', 'function', 'food',
        'reheat function', 'turbo', 'sensor cook', 'warm'], dtype='<U22'),
 array(['model', 'new', 'replace', 'new model', 'previous', 'died',
        'older', 'replaced', 'similar', 'older model'], dtype='<U22'),
 array(['power', 'level', 'power level', 'watt', 'inverter', '1250',
        '1250 watt', 'power setting', 'watt power', 'cooking'],
       dtype='<U2

In [20]:
def topic_featuring(n_components=17, n_words=10):
    
    nmf, W, H = make_nmf(n_components, .1, X)
    

    # Topic - Keywords Dataframe
    df_topic_keywords = pd.DataFrame(topic_keywords)
    df_topic_keywords.columns = ['Word '+str(i) for i in range(df_topic_keywords.shape[1])]
    df_topic_keywords.index = ['Topic '+str(i) for i in range(df_topic_keywords.shape[0])]

    Topics_theme = range(n_components)
    df_topic_keywords['topic_theme'] = Topics_theme
    df_topic_keywords.set_index('topic_theme', inplace=True)
    return df_topic_keywords.T

In [21]:
topic_featuring(17, 10)

topic_theme,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
Word 0,time,door,service,large,sensor,model,power,open,price,food,cook,lasted,used,working,popcorn,powerful,month
Word 1,second,latch,repair,fit,reheat,new,level,button,store,heat,fast,second,little,stopped,setting,big,buy
Word 2,cooking,close,day,space,sensor reheat,replace,power level,door,best,evenly,cook fast,died,far,stopped working,bag,big powerful,worked
Word 3,cooking time,door latch,warranty,size,defrost,new model,watt,push,size,heat food,cook evenly,lasted 10,long,quit working,popcorn setting,room,fine
Word 4,long,problem,customer,counter,function,previous,inverter,door open,right,quickly,evenly,10,look,quit,button,powerful large,6 month
Word 5,long time,slam,customer service,kitchen,food,died,1250,open door,delivery,cook food,sensor cook,owned,expected,far,pop,sure,6
Word 6,item,shut,center,counter space,reheat function,older,1250 watt,push button,needed,heating,cook food,previous lasted,cooking,month working,popcorn button,loud,died
Word 7,arrived time,door close,shipping,need,turbo,replaced,power setting,button open,arrived,food evenly,potato,long,think,working far,burn,spacious,warranty
Word 8,minute,hard,called,needed,sensor cook,similar,watt power,hand,expected,food quickly,larger,going,bit,completely stopped,size,inside,ago
Word 9,start,slam door,purchase,dish,warm,older model,cooking,pull,value,heat evenly,minute,previous,getting,color,make,second,month ago


In [129]:
def nmf_featurizer(max_features=10000, min_df=4, max_df=.8, ngram_max = 2, n_components=17, n_words=10, column_names_known = 'n'):
    X, vectorizer = vectorize_this(max_features, min_df, max_df, ngram_max)
    
    nmf, W, H = make_nmf(n_components, .1, X)
    
    # Topic - Keywords Dataframe
    df_topic_keywords = pd.DataFrame(topic_keywords)
    df_topic_keywords.columns = ['Word '+str(i) for i in range(df_topic_keywords.shape[1])]
    df_topic_keywords.index = ['Topic '+str(i) for i in range(df_topic_keywords.shape[0])]
    
    if column_names_known == 'n':
        Topics_theme = range(n_components)
    elif column_names_known == 'Y':
        Topics_theme = topic_labels
    df_topic_keywords['topic_theme'] = Topics_theme
    df_topic_keywords.set_index('topic_theme', inplace=True)
    return df_topic_keywords.T

In [138]:
nmf_featurizer(ngram_max=1)

topic_theme,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
Word 0,time,door,service,large,sensor,model,power,open,price,food,cook,lasted,used,working,popcorn,powerful,month
Word 1,second,latch,repair,fit,reheat,new,level,button,store,heat,fast,second,little,stopped,setting,big,buy
Word 2,cooking,close,day,space,sensor reheat,replace,power level,door,best,evenly,cook fast,died,far,stopped working,bag,big powerful,worked
Word 3,cooking time,door latch,warranty,size,defrost,new model,watt,push,size,heat food,cook evenly,lasted 10,long,quit working,popcorn setting,room,fine
Word 4,long,problem,customer,counter,function,previous,inverter,door open,right,quickly,evenly,10,look,quit,button,powerful large,6 month
Word 5,long time,slam,customer service,kitchen,food,died,1250,open door,delivery,cook food,sensor cook,owned,expected,far,pop,sure,6
Word 6,item,shut,center,counter space,reheat function,older,1250 watt,push button,needed,heating,cook food,previous lasted,cooking,month working,popcorn button,loud,died
Word 7,arrived time,door close,shipping,need,turbo,replaced,power setting,button open,arrived,food evenly,potato,long,think,working far,burn,spacious,warranty
Word 8,minute,hard,called,needed,sensor cook,similar,watt power,hand,expected,food quickly,larger,going,bit,completely stopped,size,inside,ago
Word 9,start,slam door,purchase,dish,warm,older model,cooking,pull,value,heat evenly,minute,previous,getting,color,make,second,month ago


In [24]:
vocabulary = np.array(vectorizer.get_feature_names())

In [25]:
def label_topics(H, vocabulary):
    '''
    Print the most influential words of each latent topic, and prompt the user
    to label each topic. The user should use their humanness to figure out what
    each latent topic is capturing.
    '''
    topic_labels = []
    for i, row in enumerate(H):
        top_five = np.argsort(row)[::-1][:12]
        print('topic', i)
        print('-->', ' '.join(vocabulary[top_five]))
        label = input('please label this topic: ')
        topic_labels.append(label)
        print()
    return topic_labels

In [135]:
nmf_featurizer()

topic_theme,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
Word 0,time,door,service,large,sensor,model,power,open,price,food,cook,lasted,used,working,popcorn,powerful,month
Word 1,second,latch,repair,fit,reheat,new,level,button,store,heat,fast,second,little,stopped,setting,big,buy
Word 2,cooking,close,day,space,sensor reheat,replace,power level,door,best,evenly,cook fast,died,far,stopped working,bag,big powerful,worked
Word 3,cooking time,door latch,warranty,size,defrost,new model,watt,push,size,heat food,cook evenly,lasted 10,long,quit working,popcorn setting,room,fine
Word 4,long,problem,customer,counter,function,previous,inverter,door open,right,quickly,evenly,10,look,quit,button,powerful large,6 month
Word 5,long time,slam,customer service,kitchen,food,died,1250,open door,delivery,cook food,sensor cook,owned,expected,far,pop,sure,6
Word 6,item,shut,center,counter space,reheat function,older,1250 watt,push button,needed,heating,cook food,previous lasted,cooking,month working,popcorn button,loud,died
Word 7,arrived time,door close,shipping,need,turbo,replaced,power setting,button open,arrived,food evenly,potato,long,think,working far,burn,spacious,warranty
Word 8,minute,hard,called,needed,sensor cook,similar,watt power,hand,expected,food quickly,larger,going,bit,completely stopped,size,inside,ago
Word 9,start,slam door,purchase,dish,warm,older model,cooking,pull,value,heat evenly,minute,previous,getting,color,make,second,month ago


In [136]:
topic_labels = label_topics(H,vocabulary)

topic 0
--> time second cooking cooking time long long time item arrived time minute start time used run
please label this topic: cooking time

topic 1
--> door latch close door latch problem slam shut door close hard slam door broke closing
please label this topic: door latch

topic 2
--> service repair day warranty customer customer service center shipping called purchase week cost
please label this topic: customer service and support

topic 3
--> large fit space size counter kitchen counter space need needed dish wanted perfect
please label this topic: external size

topic 4
--> sensor reheat sensor reheat defrost function food reheat function turbo sensor cook warm reheat sensor hot
please label this topic: pre-programmed functions

topic 5
--> model new replace new model previous died older replaced similar older model purchased 10
please label this topic: replacement

topic 6
--> power level power level watt inverter 1250 1250 watt power setting watt power cooking setting technol

In [27]:
def softmax(v, temperature=1.0):
    '''
    A heuristic to convert arbitrary positive values into probabilities.
    See: https://en.wikipedia.org/wiki/Softmax_function
    '''
    expv = np.exp(v / temperature)
    s = np.sum(expv)
    return expv / s

In [28]:
def analyze_reviews(W, topic_labels):
    '''
    Print an analysis of a single Amazon review, including the review ID
    and a summary of which topics it represents. The topics are identified
    via the hand-labels which were assigned by the user.
    '''
    topic_count = len(topic_labels)
    topic_percentages = [[] for i in range(0, topic_count)]

    for idx, i in enumerate(W):  
        probs = softmax(W[idx], temperature=0.01)
        topic_list_counter = 0
        for prob, label in zip(probs, topic_labels):
            topic_percentages[topic_list_counter].append(round(prob, 5))
            topic_list_counter += 1
        
    return pd.DataFrame(dict(zip(topic_labels, topic_percentages)))

In [112]:
top_df = analyze_reviews(W, topic_labels)

In [120]:
top_df

Unnamed: 0,cooking time,door latch / shutting,customer service and support,external size,programmed functions,SOUP,cooking power / wattage,door open button,perceived value,cooks food evenly,SOUP (cook time),product lifetime,popcorn feature,short product lifetime
0,0.00001,0.99999,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
1,0.00000,0.00000,0.00000,0.00009,0.00000,0.00002,0.00000,0.00000,0.00000,0.00003,0.99983,0.00000,0.00000,0.00000
2,0.00000,0.00000,0.00000,0.99993,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
3,0.00278,0.00326,0.91190,0.00286,0.00278,0.00604,0.00278,0.00278,0.00278,0.00282,0.00278,0.00278,0.00278,0.03392
4,0.00000,0.99998,0.00000,0.00000,0.00000,0.00000,0.00000,0.00002,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1870,0.00107,0.00000,0.00000,0.00002,0.00000,0.99859,0.00000,0.00000,0.00000,0.00029,0.00000,0.00000,0.00000,0.00000
1871,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.99832,0.00168,0.00000,0.00000,0.00000
1872,0.00000,0.00000,0.00000,0.00150,0.00000,0.00000,0.00000,0.00000,0.00001,0.04877,0.00005,0.00000,0.94964,0.00000
1873,0.99960,0.00001,0.00002,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,0.00007,0.00001,0.00002,0.00001,0.00003


In [113]:
top_df.iloc[0]

cooking time                    0.00001
door latch / shutting           0.99999
customer service and support    0.00000
external size                   0.00000
programmed functions            0.00000
SOUP                            0.00000
cooking power / wattage         0.00000
door open button                0.00000
perceived value                 0.00000
cooks food evenly               0.00000
SOUP (cook time)                0.00000
product lifetime                0.00000
popcorn feature                 0.00000
short product lifetime          0.00000
Name: 0, dtype: float64

In [116]:
feature_score = {}
for _ in range(len(top_df.columns)):
    key = top_df.columns[_]
    feature_score[key] = round(sum(top_df[key])/(len(top_df)),5)

In [117]:
feature_score

{'cooking time': 0.05757,
 'door latch / shutting': 0.0695,
 'customer service and support': 0.08676,
 'external size': 0.09258,
 'programmed functions': 0.06032,
 'SOUP': 0.04082,
 'cooking power / wattage': 0.068,
 'door open button': 0.0575,
 'perceived value': 0.05023,
 'cooks food evenly': 0.05713,
 'SOUP (cook time)': 0.05456,
 'product lifetime': 0.04173,
 'popcorn feature': 0.04421,
 'short product lifetime': 0.0565}

In [118]:
sorted(feature_score.items(), key= lambda x: x[1])

[('SOUP', 0.04082),
 ('product lifetime', 0.04173),
 ('popcorn feature', 0.04421),
 ('perceived value', 0.05023),
 ('SOUP (cook time)', 0.05456),
 ('short product lifetime', 0.0565),
 ('cooks food evenly', 0.05713),
 ('door open button', 0.0575),
 ('cooking time', 0.05757),
 ('programmed functions', 0.06032),
 ('cooking power / wattage', 0.068),
 ('door latch / shutting', 0.0695),
 ('customer service and support', 0.08676),
 ('external size', 0.09258)]

In [133]:
topic_words_df = nmf_featurizer(column_names_known ='Y')
topic_words_df

topic_theme,cooking time,door latch / shutting,customer service and support,external size,programmed functions,SOUP,cooking power / wattage,door open button,perceived value,cooks food evenly,SOUP (cook time),product lifetime,SOUP.1,product lifetime.1,popcorn feature,SOUP.2,short product lifetime
Word 0,time,door,service,large,sensor,model,power,open,price,food,cook,lasted,used,working,popcorn,powerful,month
Word 1,second,latch,repair,fit,reheat,new,level,button,store,heat,fast,second,little,stopped,setting,big,buy
Word 2,cooking,close,day,space,sensor reheat,replace,power level,door,best,evenly,cook fast,died,far,stopped working,bag,big powerful,worked
Word 3,cooking time,door latch,warranty,size,defrost,new model,watt,push,size,heat food,cook evenly,lasted 10,long,quit working,popcorn setting,room,fine
Word 4,long,problem,customer,counter,function,previous,inverter,door open,right,quickly,evenly,10,look,quit,button,powerful large,6 month
Word 5,long time,slam,customer service,kitchen,food,died,1250,open door,delivery,cook food,sensor cook,owned,expected,far,pop,sure,6
Word 6,item,shut,center,counter space,reheat function,older,1250 watt,push button,needed,heating,cook food,previous lasted,cooking,month working,popcorn button,loud,died
Word 7,arrived time,door close,shipping,need,turbo,replaced,power setting,button open,arrived,food evenly,potato,long,think,working far,burn,spacious,warranty
Word 8,minute,hard,called,needed,sensor cook,similar,watt power,hand,expected,food quickly,larger,going,bit,completely stopped,size,inside,ago
Word 9,start,slam door,purchase,dish,warm,older model,cooking,pull,value,heat evenly,minute,previous,getting,color,make,second,month ago


In [104]:
review_df = pd.concat([df, top_df.reindex(df.index)], axis=1)[['review_body', '1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17']]

In [110]:
print(review_df.review_body[0])
print(review_df.iloc[0])

I have had this microwave for just over 3 years and the door won't close, or the microwave doesn't know that the door is closed so it won't turn on. It is rather irritating to be having to keep slamming and pressing and shaking the door to make it start. Many a times we end up heating/cooking on the stove. I do not think it is worth the over $100 we have to pay for it and throw it after 2-3 years. I see that many others have mentioned this problem.
review_body    I have had this microwave for just over 3 year...
1                                                          1e-05
2                                                        0.99999
3                                                              0
4                                                              0
5                                                              0
6                                                              0
7                                                              0
8                          

TypeError: 'DataFrame' object is not callable