# Lesson:  Scattertext

In [None]:
# install scattertext
!conda install scattertext

In [None]:
from IPython.display import IFrame
import scattertext as st
from scattertext import (
    SampleCorpora,
    produce_scattertext_explorer,
    produce_scattertext_html,
)
from scattertext.CorpusFromPandas import CorpusFromPandas

In [None]:
# Build the scattertext corpus object
corpus = st.CorpusFromPandas(
    reviews, category_col="stars",
    text_col='text', 
    nlp=nlp
).build().remove_terms(nlp.Defaults.stop_words, ignore_absences = True)

In [None]:
# verify object type
type(corpus)

## Explore Corpus Object

In [None]:
# These are the terms found in the reviews that are different than general English texts
print(list(corpus.get_scaled_f_scores_vs_background().index[:10]))

In [None]:
# Next, we can compare the frequency of a term in the 1-star versus 5-star reviews.

term_freq_df = corpus.get_term_freq_df()
term_freq_df

In [None]:
# include f_scores in df
# Add the f score for each term for each term
term_freq_df['5-star'] = corpus.get_scaled_f_scores('5')
term_freq_df['1-star'] = corpus.get_scaled_f_scores('1')

term_freq_df.head()

### find terms characteristic of each category

In [None]:
# Terms most associated with 5 star reviews
print(list(term_freq_df.sort_values(by='5-star', ascending=False).index[:10]))

In [None]:
# Terms most associated with 1 star reviews
print(list(term_freq_df.sort_values(by='1-star', ascending=False).index[:10]))

## Create HTML Visualization i.e. Interactive Explorer

In [None]:
# Create html visualization
# Provide file path to save 
scatter_fname = './scattertext_restaurant.html'

scatter_html = st.produce_scattertext_explorer(
    corpus,
    category= '5',
    category_name="5-Star Reviews",
    not_category_name="1-Star Reviews",
    minimum_term_frequency=3,
    width_in_pixels=1000,
    metadata=reviews["text"],
)
open(scatter_fname, 'wb').write(scatter_html.encode('utf-8'))
print(f'Open {scatter_fname} in Chrome or Firefox.')



 you can download HTML file and open in Chrome or Firefox