In [50]:
import pandas as pd
from matplotlib import pyplot as plt
from textwrap import wrap
    
df = pd.read_csv('../rotten_tomatoes_critic_reviews.csv')
df2 = df[~df.review_score.isna()]
df2 = df2.drop_duplicates(subset=['review_score'])

score_labels = ["VERY_BAD", "BAD", "MID", "GOOD", "VERY_GOOD"]
score_letters = ['E', 'D', 'C', 'B', 'A']
review_score_label = []

def is_it_number_score(score):
    for char in score:
        if char.isalpha():
            return False
    return True

def get_label_from_score_ratio(score):
    index = score.find('/')
    numerator = score[0:index]
    denominator = score[(index + 1):]
    if float(denominator) <= 0:
        return score_labels[0]
    res = (float(numerator)/float(denominator)) * 5
    rounded_res = round(res)
    if rounded_res / 5 > 1:
        rounded_res = 5
    if rounded_res != 0:
        rounded_res -= 1
    return score_labels[rounded_res]

def get_label_from_score_letter(score):
    letter = score[0]
    count = 0
    for score_letter in score_letters:
        if score_letter == letter:
            return score_labels[count]
        count += 1
    return score_labels[0]
    

for score in df2["review_score"]:
    if is_it_number_score(score):
        if score.find('/') >= 0:
            review_score_label.append(get_label_from_score_ratio(score))
        else:
            review_score_label.append(get_label_from_score_ratio(score + '/100'))
    else:
        review_score_label.append(get_label_from_score_letter(score))

df2["review_score_label"] = review_score_label

In [59]:
from sklearn.feature_extraction.text import TfidfVectorizer

def get_TFIDF_by_review_score_label(dataset):
    tfIdfVectorizer = TfidfVectorizer(use_idf=True)
    tfIdf = tfIdfVectorizer.fit_transform(dataset)
    df = pd.DataFrame(tfIdf[0].T.todense(), index=tfIdfVectorizer.get_feature_names(), columns=["TF-IDF"])
    df = df.sort_values('TF-IDF', ascending=False)
    return df
#     print(df.head(25))

review_by_score = {
    "VERY_BAD": {},
    "BAD": {},
    "MID": {},
    "GOOD": {},
    "VERY_GOOD": {},
}

for label in review_by_score.keys():
    review_by_score[label]["review_content"] = df2[df2.review_score_label == label]["review_content"]
    review_by_score[label]["tfidf"] = get_TFIDF_by_review_score_label((review_by_score[label]["review_content"]).values.astype('U'))

{'VERY_BAD': {'review_content': 6          Harry Potter knockoffs don't come more transpa...
51         Sadly worthy of its dumping into the cinematic...
417        Almost worth sitting through the first half fo...
445        A strong camp sensibility is just about the on...
476        Visually, historically and creatively, this fi...
                                 ...                        
1073517    If Get Out introduced a savvy culture vulture ...
1074852    Even by the thoroughly undemanding standards o...
1075458    All in all, I would caution ... (those) just t...
1076931    ...the shadow-puppet apocalypse exists outside...
1104287    A deeply reflective collection of moments, bot...
Name: review_content, Length: 137, dtype: object, 'tfidf': None}, 'BAD': {'review_content': 11         For what it is and for whom it is intended, it...
12         Chris Columbus returns to his comfort zone for...
45         The Lightning Thief is all crass ineptitude an...
57         Percy Jacks

In [56]:
from wordcloud import WordCloud

def makeImage(frequency):
#     alice_mask = np.array(Image.open("alice_mask.png"))

#     wc = WordCloud(background_color="white", max_words=1000, mask=alice_mask)
    wc = WordCloud(background_color="white", max_words=1000)
    # generate word cloud
    wc.generate_from_frequencies(frequency)

    # show
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.show()
    


{'review_content': 6          Harry Potter knockoffs don't come more transpa...
51         Sadly worthy of its dumping into the cinematic...
417        Almost worth sitting through the first half fo...
445        A strong camp sensibility is just about the on...
476        Visually, historically and creatively, this fi...
                                 ...                        
1073517    If Get Out introduced a savvy culture vulture ...
1074852    Even by the thoroughly undemanding standards o...
1075458    All in all, I would caution ... (those) just t...
1076931    ...the shadow-puppet apocalypse exists outside...
1104287    A deeply reflective collection of moments, bot...
Name: review_content, Length: 137, dtype: object, 'tfidf': None}
