# Import Libraries

In [6]:
from tqdm import tqdm
from topicutils import get_topics_lda, sample_polarity
import pandas as pd
import os
tqdm.pandas()

# Allow multiple outputs to be displayed for each cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

DATASET_DIR = '../dataset/'

# Load Dataset

In [12]:
london = pd.read_csv(os.path.join(DATASET_DIR, 'london_preprocessed.csv'))
nyc = pd.read_csv(os.path.join(DATASET_DIR, 'nyc_preprocessed.csv'))

# Drop rows with missing values
london = london.dropna()
nyc = nyc.dropna()

data = pd.concat([london, nyc], axis=0)

In [13]:
sample_size = 10000
sample_london = london.sample(sample_size)
sample_nyc = nyc.sample(sample_size)
sample_positives = sample_polarity(data, 1.0, sample_size)
sample_negatives = sample_polarity(data, 0.0, sample_size)
sample_london_positives = sample_polarity(london, 1.0, sample_size)
sample_london_negatives = sample_polarity(london, 0.0, sample_size)
sample_nyc_positives = sample_polarity(nyc, 1.0, sample_size)
sample_nyc_negatives = sample_polarity(nyc, 0.0, sample_size)

# Run LDA

In [16]:
num_topics = 5
lda_london, vect_london = get_topics_lda(sample_london, num_topics, 'London')
lda_nyc, vect_nyc = get_topics_lda(sample_nyc, num_topics, 'NYC')
lda_positives, vect_positives = get_topics_lda(sample_positives, num_topics, 'Positive Reviews')
lda_negatives, vect_negatives = get_topics_lda(sample_negatives, num_topics, 'Negative Reviews')
lda_london_positives, vect_london_positives = get_topics_lda(sample_london_positives, num_topics, 'London Positive Reviews')
lda_london_negatives, vect_london_negatives = get_topics_lda(sample_london_negatives, num_topics, 'London Negative Reviews')
lda_nyc_positives, vect_nyc_positives = get_topics_lda(sample_nyc_positives, num_topics, 'NYC Positive Reviews')
lda_nyc_negatives, vect_nyc_negatives = get_topics_lda(sample_nyc_negatives, num_topics, 'NYC Negative Reviews')

Topics for London
	Topic 0:
['host', 'needed', 'pleasant', 'nice', 'london', 'place', 'location', 'great', 'stay', 'perfect']

	Topic 1:
['location', 'perfect', 'loved', 'room', 'clean', 'host', 'great', 'stay', 'place', 'nice']

	Topic 2:
['comfortable', 'host', 'nice', 'apartment', 'perfect', 'location', 'place', 'london', 'great', 'stay']

	Topic 3:
['close', 'host', 'clean', 'location', 'room', 'lovely', 'stay', 'great', 'place', 'nice']

	Topic 4:
['good', 'excellent', 'lovely', 'clean', 'nice', 'host', 'location', 'place', 'stay', 'great']

Topics for NYC
	Topic 0:
['house', 'apartment', 'comfortable', 'home', 'host', 'clean', 'great', 'nice', 'stay', 'place']

	Topic 1:
['location', 'close', 'subway', 'apartment', 'nice', 'clean', 'stay', 'place', 'great', 'easy']

	Topic 2:
['comfortable', 'amazing', 'perfect', 'clean', 'nice', 'host', 'location', 'stay', 'place', 'great']

	Topic 3:
['host', 'perfect', 'nice', 'clean', 'apartment', 'stay', 'location', 'described', 'place', 'gr