In [1]:
import sys
sys.path.append('../')
import wrangle

import nlp

In [2]:
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import numpy as np

In [3]:
pd.set_option('display.max_columns', None)

df, data_dict = wrangle.wrangle_data()

In [4]:
big_df = pd.read_csv('topics.csv', index_col=False)

In [5]:
big_df.drop('Unnamed: 0', axis=1,inplace=True)

In [8]:
mod_exp = big_df[big_df.persona_id == 4]

In [33]:
mod_exp.shape

(181, 31)

# Moderately Experienced
## What topics would they be most attracted to at a conference about research?

q21 ideal_topics


In [18]:
stop_words = ['like', 'plus', 'real', 'love', 'big', 'avoiding', 'mean', 'content', 'people', 'problem', 
              'doing', 'using','research', 'work', 'don', 'make', 'conference', 'good', 'best', 'self', 'report']

In [19]:
stopWords = nlp.set_stop_words(stop_words)

### keywords

In [23]:
#words
nlp.show_column_keywords(mod_exp.ideal_topics_text, max_df=.5, stop_words=stopWords, ngram_range=(1,3))

['mixed',
 'method',
 'approach',
 'artifact',
 'design',
 'presentation',
 'mixed method',
 'method approach',
 'question',
 'data',
 'influencing',
 'leadership',
 'management',
 'global',
 'leading',
 'team',
 'way',
 'plan',
 'support',
 'skill']

In [22]:
#bi-grams
nlp.show_column_keywords(mod_exp.ideal_topics_text, max_df=.5, stop_words=stopWords, ngram_range=(2,3))

['mixed method',
 'method approach',
 'leading team',
 'product development',
 'large organization',
 'ops case',
 'case study',
 'ops case study',
 'product team',
 'method analysis',
 'analysis method',
 'study practice',
 'case study practice',
 'business strategy',
 'new method',
 'career path',
 'design product',
 'structure team',
 'working product',
 'agile product']

### topics

In [24]:
mod_exp.ideal_topics_topic_id.value_counts()

case_study                  65
new_method, mixed_method    48
quant/qual/data             40
ops/ai                      28
Name: ideal_topics_topic_id, dtype: int64

## Who would they expect to see at a conference about research?

q22 Ideal Attendees

In [11]:
stop_words = ['like', 'plus', 'real', 'love', 'big', 'avoiding', 'mean', 'content', 'people', 'problem', 
              'doing', 'using','research', 'work', 'don', 'make', 'conference']

In [12]:
stopWords = nlp.set_stop_words(stop_words)

In [25]:
#terms
nlp.show_column_keywords(mod_exp.ideal_attendees_text, max_df=.5, stop_words=stopWords)

['steve',
 'portigal',
 'steve portigal',
 'genevieve',
 'young',
 'graduate',
 'importantly',
 'client',
 'experience',
 'startup',
 'consumer',
 'manager',
 'researcher',
 'present',
 'case',
 'study',
 'case study',
 'jared',
 'spool',
 'team']

In [26]:
#bigrams
nlp.show_column_keywords(mod_exp.ideal_attendees_text, max_df=.5, stop_words=stopWords, ngram_range=(2,3))

['steve portigal',
 'case study',
 'jared spool',
 'spool steve',
 'organisation google',
 'google facebook',
 'jared spool steve',
 'spool steve portigal',
 'small large',
 'public sector',
 'queer color',
 'present perspective',
 'design team',
 'leader field',
 'sam ladner',
 'leisa reichelt',
 'reichelt erika',
 'erika hall',
 'leisa reichelt erika',
 'reichelt erika hall']

In [27]:
mod_exp.ideal_attendees_topic_id.value_counts()

industry, team, product    95
indi young                 34
sam ladner, erika hall     27
experience, jared spool    25
Name: ideal_attendees_topic_id, dtype: int64

## What advice do they have for the Rosenfeld Media team in pursuing a conference?
q23 recommendations

In [28]:
#words
nlp.show_column_keywords(mod_exp.recommendations_text, max_df=.9, stop_words=stopWords, ngram_range=(1,7))

['read',
 'book',
 'learn',
 'lecture',
 'hand',
 'help',
 'run',
 'workshop',
 'career',
 'sure',
 'design',
 'difficult',
 'business',
 'approach',
 'try',
 'teach',
 'case',
 'away',
 'world',
 'fun']

In [29]:
#ngrams
nlp.show_column_keywords(mod_exp.recommendations_text, max_df=.9, stop_words=stopWords, ngram_range=(2,7))

['interesting way',
 'consider bringing',
 'ticket price',
 'price point',
 'product manager',
 'practical application',
 'industry background',
 'hand activity',
 'speaker new',
 'great idea',
 'thought leader',
 'talk workshop',
 'person hear',
 'informed consent',
 'participant time',
 'north america']

In [30]:
mod_exp.recommendations_topic_id.value_counts()

speaker, industry      84
event, opportunity     29
good, know             26
group, career, city    25
field, survery         17
Name: recommendations_topic_id, dtype: int64

## Top Documents per Topic

In [31]:
doc_term_matrix, count_vect = nlp.create_wordcount_matrix(mod_exp.recommendations_text, max_df=.3, ngram=(1,3), stop_words=stopWords)

LDA = LatentDirichletAllocation(n_components=4, random_state=42)

LDA.fit(doc_term_matrix)

LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
                          evaluate_every=-1, learning_decay=0.7,
                          learning_method='batch', learning_offset=10.0,
                          max_doc_update_iter=100, max_iter=10,
                          mean_change_tol=0.001, n_components=4, n_jobs=None,
                          perp_tol=0.1, random_state=42, topic_word_prior=None,
                          total_samples=1000000.0, verbose=0)

In [None]:
recommendations_dict = {0 : 'speaker, industry', 
                        1 : 'event, opportunity', 
                        2 : 'good, know', 
                        3 : 'field, survery', 
                        4 : 'group, career, city'}

In [32]:
nlp.find_top_documents_per_topic(LDA.transform(doc_term_matrix), mod_exp.recommendations_text, 5)

Top 5 Documents for Topic 0: 

Document 1
I realize this is challenging but cost matters so try to keep it low. In some cases, I've paid for things myself because the company hasn't paid for it. I wish more event planners would consider regular everyday job blow who can't pay thousands to go to a conference, especially if they need to travel too. 

Document 2
Make it a mix of workshops and talks and try to avoid the tired "advocate for the user", "be empathetic" user research rallying cries of yore. That stuff is boring and meaningless. Explore research ethics and design justice, informed consent, and what it means to scale user research in organisations and why it's difficult.

Document 3
Have a well-written code of conduct.
Share materials (slides, talks) afterward as much as possible. Having access to these allows researchers to socialize what we learned within our orgs.
Cater to more experienced researchers - there are already so many places to learn methods and basics but very few