### Imported the Following Models

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import stop_words
import nltk
nltk.download('wordnet')
nltk.download('stopwords')

from nltk.tokenize import sent_tokenize, word_tokenize, RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import re
import string
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, plot_confusion_matrix
from sklearn.metrics import accuracy_score, plot_roc_curve, roc_auc_score, recall_score, precision_score, f1_score


# Import CountVectorizer and TFIDFVectorizer from feature_extraction.text.
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from nltk.sentiment.vader import SentimentIntensityAnalyzer

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, plot_confusion_matrix

pd.options.display.max_colwidth = 200

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/edwardmendoza/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/edwardmendoza/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
pd.set_option("max_rows", 100)

In [3]:
mens_issues = pd.read_csv('men_df_eda.csv')

In [4]:
mens_issues.drop(columns = ['Unnamed: 0', 'title', 'selftext', 'subreddit', 'author', 'post_length', 'all_text2'], inplace=True)

In [5]:
mens_issues.head()

Unnamed: 0,all_text
0,masculin fear ordinari need superior chosen endeavour constantli strive good better insult describ averag ordinari mediocr insult caus masculin threaten discuss particular aspect masculin aspect m...
1,date resourc
2,vunrabl
3,walk away women abort
4,mainstream


In [6]:
mens_issues.dropna(inplace=True)

In [7]:
mens_issues.isnull().sum()

all_text    0
dtype: int64

_____

### Topic Modeling

In [8]:
from gensim import matutils, models
import scipy.sparse

import pickle

In [9]:
tdm = mens_issues.transpose()
tdm.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26595,26596,26597,26598,26599,26600,26601,26602,26603,26604
all_text,masculin fear ordinari need superior chosen endeavour constantli strive good better insult describ averag ordinari mediocr insult caus masculin threaten discuss particular aspect masculin aspect m...,date resourc,vunrabl,walk away women abort,mainstream,think popular discours harm convers better,stop abort,stereotyp,anybodi check palgrav handbook male psycholog mental health thought read good sourc mental health hard shell euro imagin check thought,group better help woman wife girl licens profession counselor start youtub channel address mental health free laugh return read therapist help deepli hurt silent struggl love work strong pull rea...,...,grow multipl sister fightget ampnbsp look ampnbsp think effect ampnbsp effemin result ampnbsp present relationship,want help boyfriend learn respons boyfriend relationship deterior graduat univers finish spring lucki great program work coupl cours finish graduat summer work field work labour laid issu doesnt...,enjoy oral girl,male equival flower girl flower nice gestur girl,decis life turn unpleas success,romant intim andor sexual relationship honestli think chang,arrang date bisexu continu annoy girl simpli practic process arrang date imposs lose continu string girl repli text vagu unhelp answer moment girl date went text follow shed great want meet chat ...,favorit podcast listen,respond hand written letter express silli simpl straightforward edit decid good clariti thank honesti,grow mustach clip hair center leav grow contribut outer mustach look mustach advic sure vari depend style sure worthwhil topic


In [10]:
from sklearn.feature_extraction.text import CountVectorizer

cv = CountVectorizer(stop_words='english')
data_cv = cv.fit_transform(mens_issues.all_text)
data_dtm = pd.DataFrame(data_cv.toarray(), columns=cv.get_feature_names())
data_dtm.index = mens_issues.index
data_dtm.head()

Unnamed: 0,aaaaaaa,aaaaaaaaaahhhhhhhh,aaaaaaaaargh,aaaaaaaanyway,aardvark,aaron,aawww,ababa,aback,abandon,...,카카오스토리,카톡mad,키워드광고,페이스북,페이스북광고,홈페이지만드는법,홈페이지제작방법,홈페이지제작사이트,홈페이지제작업체,홈페이지형블로그
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
sparse_counts = scipy.sparse.csr_matrix(data_dtm)
corpus = matutils.Sparse2Corpus(sparse_counts)

In [23]:
corpus

<gensim.matutils.Sparse2Corpus at 0x1258525e0>

In [29]:
from sklearn.feature_extraction.text import CountVectorizer

# Recreate document-term matrix
cv = CountVectorizer(stop_words=stop_words.stop_words)
data_cv = cv.fit_transform(mens_issues.all_text)
data_stop = pd.DataFrame(data_cv.toarray(), columns=cv.get_feature_names())
data_stop.index = mens_issues.index

# Pickle it for later use
pickle.dump(cv, open("cv_stop.pkl", "wb"))

In [30]:
cv = pickle.load(open("cv_stop.pkl", "rb"))
id2word = dict((v, k) for k, v in cv.vocabulary_.items())

In [32]:
lda = models.LdaModel(corpus=corpus, id2word=id2word, num_topics=2, passes=10)
lda.print_topics()

[(0,
  '0.005*"autoerotic" + 0.004*"catastoph" + 0.004*"coattail" + 0.004*"ablat" + 0.004*"battlefront" + 0.003*"chequ" + 0.003*"coars" + 0.003*"child" + 0.003*"afghanistan" + 0.003*"bigfram"'),
 (1,
  '0.003*"gail" + 0.003*"bugaloo" + 0.002*"gona" + 0.002*"hatefil" + 0.002*"burr" + 0.002*"girl" + 0.001*"bezo" + 0.001*"garth" + 0.001*"electromagnet" + 0.001*"growingup"')]

In [36]:
### passing the model with lower passes, increased num_topics
lda2 = models.LdaModel(corpus=corpus, id2word=id2word, num_topics=10, passes=5)
lda2.print_topics()

[(0,
  '0.005*"dehuman" + 0.004*"burr" + 0.004*"defenc" + 0.004*"anarchosyndicalist" + 0.004*"datingand" + 0.003*"earthsea" + 0.003*"attende" + 0.003*"fatigu" + 0.003*"tommi" + 0.003*"dickless"'),
 (1,
  '0.005*"garth" + 0.005*"bezo" + 0.005*"girl" + 0.005*"hatefil" + 0.004*"cipralex" + 0.004*"girlsll" + 0.003*"grater" + 0.003*"gail" + 0.003*"girlfriendi" + 0.003*"exposur"'),
 (2,
  '0.010*"gail" + 0.009*"growingup" + 0.006*"gamedev" + 0.005*"goddamn" + 0.004*"crowi" + 0.004*"friendsformerli" + 0.004*"catchuphello" + 0.004*"automod" + 0.004*"greatgrandmoth" + 0.004*"friendso"'),
 (3,
  '0.010*"autoerotic" + 0.006*"child" + 0.005*"cismenmen" + 0.005*"dieseas" + 0.005*"deathb" + 0.004*"bigotrymisconcept" + 0.004*"centauri" + 0.004*"crazyi" + 0.003*"diddidnt" + 0.003*"abruptli"'),
 (4,
  '0.012*"battlefront" + 0.005*"haphazardli" + 0.004*"havе" + 0.004*"finsta" + 0.004*"flat" + 0.003*"hasten" + 0.003*"dwarfism" + 0.003*"nonconsentu" + 0.003*"finac" + 0.003*"envioment"'),
 (5,
  '0.021*"ab

In [37]:
### passing the model with lower passes, increased num_topics
lda3 = models.LdaModel(corpus=corpus, id2word=id2word, num_topics=5, passes=5)
lda3.print_topics()

[(0,
  '0.008*"coars" + 0.006*"battlefront" + 0.003*"gamedev" + 0.003*"haphazardli" + 0.003*"gail" + 0.003*"finsta" + 0.002*"gona" + 0.002*"excusedelus" + 0.002*"dwarfism" + 0.002*"garth"'),
 (1,
  '0.011*"fancentro" + 0.007*"banish" + 0.005*"burr" + 0.004*"chin" + 0.004*"argumentfight" + 0.003*"goddamn" + 0.003*"ccvictimsskycom" + 0.003*"monthsold" + 0.003*"betray" + 0.003*"beholden"'),
 (2,
  '0.005*"bugaloo" + 0.004*"gail" + 0.002*"hatefil" + 0.002*"finest" + 0.002*"electromagnet" + 0.002*"homework" + 0.002*"girl" + 0.002*"friendso" + 0.002*"foolhardi" + 0.002*"burr"'),
 (3,
  '0.011*"bezo" + 0.005*"comichttpimgurcomaxm" + 0.004*"betasi" + 0.004*"centauri" + 0.004*"cuddli" + 0.004*"bloodbath" + 0.003*"fatigu" + 0.003*"afsp" + 0.003*"growingup" + 0.003*"bathroom"'),
 (4,
  '0.006*"autoerotic" + 0.005*"catastoph" + 0.005*"coattail" + 0.005*"ablat" + 0.004*"chequ" + 0.004*"child" + 0.004*"afghanistan" + 0.003*"cismenmen" + 0.003*"bigfram" + 0.003*"blackpil"')]

Based on these passes of topic modeling via LDA, it's difficult to interpret the topics that we have here. Will adjust the words that will be passed through the model 

#### Topic Modeling - Nouns

In [34]:
#credits to Alice Zhao
# Let's create a function to pull out nouns from a string of text
from nltk import word_tokenize, pos_tag

def nouns(text):
    '''Given a string of text, tokenize the text and pull out only the nouns.'''
    is_noun = lambda pos: pos[:2] == 'NN'
    tokenized = word_tokenize(text)
    all_nouns = [word for (word, pos) in pos_tag(tokenized) if is_noun(pos)] 
    return ' '.join(all_nouns)

In [35]:
mens_issues['all_text'] = mens_issues.all_text.apply(nouns)

In [38]:
# Create a new document-term matrix using only nouns
from sklearn.feature_extraction.text import CountVectorizer

# Recreate a document-term matrix with only nouns
cvn = CountVectorizer(stop_words=stop_words.stop_words)
data_cvn = cvn.fit_transform(mens_issues.all_text)
data_dtmn = pd.DataFrame(data_cvn.toarray(), columns=cvn.get_feature_names())
data_dtmn.index = mens_issues.all_text.index
#data_dtmn

In [39]:
# Create the gensim corpus
corpusn = matutils.Sparse2Corpus(scipy.sparse.csr_matrix(data_dtmn.transpose()))

# Create the vocabulary dictionary
id2wordn = dict((v, k) for k, v in cvn.vocabulary_.items())

In [40]:
ldan = models.LdaModel(corpus=corpusn, num_topics=2, id2word=id2wordn, passes=10)
ldan.print_topics()

[(0,
  '0.021*"friend" + 0.020*"girl" + 0.020*"date" + 0.017*"work" + 0.016*"relationship" + 0.014*"life" + 0.011*"help" + 0.009*"start" + 0.008*"look" + 0.006*"school"'),
 (1,
  '0.039*"women" + 0.016*"woman" + 0.008*"delet" + 0.007*"male" + 0.007*"hair" + 0.006*"masculin" + 0.005*"look" + 0.005*"issu" + 0.005*"attract" + 0.005*"gener"')]

In [41]:
#Pass 2
ldan2 = models.LdaModel(corpus=corpusn, num_topics=2, id2word=id2wordn, passes=10)
ldan2.print_topics()

[(0,
  '0.023*"friend" + 0.022*"girl" + 0.021*"date" + 0.018*"relationship" + 0.014*"work" + 0.012*"life" + 0.010*"look" + 0.009*"start" + 0.007*"woman" + 0.006*"help"'),
 (1,
  '0.028*"women" + 0.012*"help" + 0.007*"issu" + 0.007*"work" + 0.007*"life" + 0.006*"male" + 0.005*"health" + 0.005*"masculin" + 0.005*"posit" + 0.004*"problem"')]

In [42]:
#Pass 3, increasing num_topics and passes
ldan2 = models.LdaModel(corpus=corpusn, num_topics=5, id2word=id2wordn, passes=15)
ldan2.print_topics()

[(0,
  '0.025*"hair" + 0.020*"watch" + 0.015*"favorit" + 0.014*"look" + 0.013*"bodi" + 0.012*"movi" + 0.010*"porn" + 0.010*"video" + 0.010*"face" + 0.007*"style"'),
 (1,
  '0.076*"girl" + 0.074*"date" + 0.043*"relationship" + 0.042*"women" + 0.025*"woman" + 0.019*"delet" + 0.018*"friend" + 0.014*"look" + 0.013*"attract" + 0.012*"partner"'),
 (2,
  '0.024*"work" + 0.007*"studi" + 0.007*"colleg" + 0.007*"univers" + 0.006*"success" + 0.006*"graduat" + 0.006*"money" + 0.006*"career" + 0.005*"field" + 0.005*"school"'),
 (3,
  '0.030*"women" + 0.015*"masculin" + 0.012*"male" + 0.012*"issu" + 0.010*"gender" + 0.009*"discuss" + 0.008*"posit" + 0.007*"commun" + 0.007*"help" + 0.007*"rape"'),
 (4,
  '0.023*"friend" + 0.020*"life" + 0.018*"work" + 0.014*"help" + 0.013*"start" + 0.008*"school" + 0.007*"relationship" + 0.007*"look" + 0.006*"love" + 0.006*"problem"')]

In [43]:
#Pass 4, increasing num_topics and passes
ldan3 = models.LdaModel(corpus=corpusn, num_topics=5, id2word=id2wordn, passes=15)
ldan3.print_topics()

[(0,
  '0.053*"girl" + 0.053*"friend" + 0.051*"date" + 0.025*"relationship" + 0.013*"look" + 0.012*"girlfriend" + 0.010*"start" + 0.009*"text" + 0.008*"work" + 0.007*"meet"'),
 (1,
  '0.078*"women" + 0.031*"woman" + 0.013*"masculin" + 0.010*"male" + 0.010*"gender" + 0.008*"gener" + 0.008*"issu" + 0.007*"discuss" + 0.007*"attract" + 0.006*"commun"'),
 (2,
  '0.022*"help" + 0.018*"parent" + 0.012*"hair" + 0.009*"bodi" + 0.007*"money" + 0.007*"father" + 0.007*"mother" + 0.007*"famili" + 0.006*"deal" + 0.006*"need"'),
 (3,
  '0.029*"life" + 0.026*"work" + 0.013*"relationship" + 0.012*"help" + 0.010*"love" + 0.009*"start" + 0.009*"school" + 0.007*"point" + 0.007*"chang" + 0.007*"care"'),
 (4,
  '0.053*"delet" + 0.014*"masturb" + 0.014*"health" + 0.012*"help" + 0.012*"porn" + 0.012*"watch" + 0.010*"studi" + 0.009*"peni" + 0.008*"video" + 0.007*"research"')]

In [44]:
# Create the gensim corpus
corpusn = matutils.Sparse2Corpus(scipy.sparse.csr_matrix(data_dtmn.transpose()))

# Create the vocabulary dictionary
id2wordn = dict((v, k) for k, v in cvn.vocabulary_.items())

In [45]:
#pass 5
ldan4 = models.LdaModel(corpus=corpusn, num_topics=2, id2word=id2wordn, passes=10)
ldan4.print_topics()

[(0,
  '0.039*"women" + 0.014*"woman" + 0.007*"male" + 0.007*"delet" + 0.007*"hair" + 0.006*"masculin" + 0.005*"look" + 0.005*"bodi" + 0.005*"issu" + 0.004*"gender"'),
 (1,
  '0.021*"friend" + 0.020*"girl" + 0.020*"date" + 0.017*"work" + 0.016*"relationship" + 0.014*"life" + 0.010*"help" + 0.009*"start" + 0.008*"look" + 0.006*"school"')]

In [46]:
#pass 6
ldan5 = models.LdaModel(corpus=corpusn, num_topics=5, id2word=id2wordn, passes=5)
ldan5.print_topics()

[(0,
  '0.085*"women" + 0.057*"date" + 0.048*"girl" + 0.042*"woman" + 0.030*"delet" + 0.020*"attract" + 0.017*"look" + 0.012*"partner" + 0.010*"bodi" + 0.010*"relationship"'),
 (1,
  '0.027*"hair" + 0.011*"movi" + 0.010*"product" + 0.009*"studi" + 0.007*"style" + 0.007*"film" + 0.007*"watch" + 0.007*"work" + 0.006*"health" + 0.006*"routin"'),
 (2,
  '0.029*"friend" + 0.020*"work" + 0.020*"relationship" + 0.017*"girl" + 0.017*"life" + 0.014*"date" + 0.012*"start" + 0.009*"help" + 0.008*"look" + 0.007*"school"'),
 (3,
  '0.021*"parent" + 0.021*"help" + 0.011*"favorit" + 0.010*"famili" + 0.009*"money" + 0.009*"life" + 0.009*"need" + 0.009*"mother" + 0.008*"father" + 0.008*"wife"'),
 (4,
  '0.011*"male" + 0.011*"masculin" + 0.010*"issu" + 0.009*"gender" + 0.008*"rule" + 0.008*"posit" + 0.008*"discuss" + 0.008*"commun" + 0.007*"gener" + 0.006*"help"')]

In [47]:
#pass 7
ldan6 = models.LdaModel(corpus=corpusn, num_topics=5, id2word=id2wordn, passes=10)
ldan6.print_topics()

[(0,
  '0.059*"women" + 0.012*"male" + 0.012*"masculin" + 0.009*"gender" + 0.007*"discuss" + 0.007*"issu" + 0.007*"gener" + 0.006*"view" + 0.005*"posit" + 0.005*"commun"'),
 (1,
  '0.043*"friend" + 0.031*"girl" + 0.024*"relationship" + 0.021*"work" + 0.019*"date" + 0.012*"start" + 0.010*"girlfriend" + 0.009*"school" + 0.008*"tell" + 0.007*"home"'),
 (2,
  '0.033*"life" + 0.031*"help" + 0.017*"work" + 0.009*"depress" + 0.009*"problem" + 0.007*"start" + 0.007*"need" + 0.007*"chang" + 0.007*"health" + 0.007*"thank"'),
 (3,
  '0.010*"bodi" + 0.008*"look" + 0.008*"compliment" + 0.007*"woman" + 0.007*"ball" + 0.006*"hand" + 0.006*"creepi" + 0.005*"cloth" + 0.005*"receiv" + 0.005*"food"'),
 (4,
  '0.051*"date" + 0.027*"woman" + 0.025*"girl" + 0.025*"delet" + 0.021*"look" + 0.020*"women" + 0.017*"attract" + 0.016*"relationship" + 0.014*"partner" + 0.014*"hair"')]

In [48]:
#pass 8
ldan7 = models.LdaModel(corpus=corpusn, num_topics=5, id2word=id2wordn, passes=10)
ldan7.print_topics()

[(0,
  '0.089*"women" + 0.020*"woman" + 0.013*"male" + 0.010*"gender" + 0.008*"discuss" + 0.008*"gener" + 0.007*"issu" + 0.006*"rape" + 0.006*"face" + 0.006*"view"'),
 (1,
  '0.060*"girl" + 0.058*"date" + 0.044*"relationship" + 0.019*"friend" + 0.017*"look" + 0.015*"delet" + 0.011*"woman" + 0.009*"text" + 0.009*"girlfriend" + 0.009*"attract"'),
 (2,
  '0.035*"friend" + 0.027*"work" + 0.016*"life" + 0.015*"school" + 0.011*"start" + 0.009*"parent" + 0.009*"colleg" + 0.009*"home" + 0.006*"tell" + 0.006*"doesnt"'),
 (3,
  '0.013*"posit" + 0.013*"watch" + 0.012*"favorit" + 0.011*"masculin" + 0.011*"love" + 0.010*"movi" + 0.007*"play" + 0.006*"video" + 0.006*"film" + 0.005*"style"'),
 (4,
  '0.032*"help" + 0.019*"life" + 0.015*"work" + 0.010*"depress" + 0.009*"problem" + 0.009*"look" + 0.008*"hair" + 0.008*"thank" + 0.007*"health" + 0.007*"need"')]

_____

#### Topic Modeling - Attempt#3 (Nouns and Adjectives)

In [49]:
# Function to pull out nuns and adjectives 
def nouns_adj(text):
    '''Given a string of text, tokenize the text and pull out only the nouns and adjectives.'''
    is_noun_adj = lambda pos: pos[:2] == 'NN' or pos[:2] == 'JJ'
    tokenized = word_tokenize(text)
    nouns_adj = [word for (word, pos) in pos_tag(tokenized) if is_noun_adj(pos)] 
    return ' '.join(nouns_adj)

In [50]:
data_nouns_adj = pd.DataFrame(mens_issues.all_text.apply(nouns_adj))
#data_nouns_adj.head()

Unnamed: 0,all_text
0,masculin need constantli insult averag mediocr insult caus masculin masculin masculin drive compet masculin masculin includ believ fear gender acknowledg respons forth motiv masculin desir foremos...
1,date resourc
2,vunrabl
3,women
4,mainstream


In [51]:
# Create a new document-term matrix using only nouns and adjectives, also remove common words with max_df

cvna = CountVectorizer(stop_words=stop_words.stop_words, max_df=.8)
data_cvna = cvna.fit_transform(data_nouns_adj.all_text)
data_dtmna = pd.DataFrame(data_cvna.toarray(), columns=cvna.get_feature_names())
data_dtmna.index = data_nouns_adj.index
#data_dtmna.head()



Unnamed: 0,aaaaaaa,aaaaaaaaaahhhhhhhh,aaaaaaaaargh,aardvark,aaron,aawww,aback,abandon,abandonedi,abat,...,카카오스토리,카톡mad,키워드광고,페이스북,페이스북광고,홈페이지만드는법,홈페이지제작방법,홈페이지제작사이트,홈페이지제작업체,홈페이지형블로그
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [52]:
# Create the gensim corpus
corpusna = matutils.Sparse2Corpus(scipy.sparse.csr_matrix(data_dtmna.transpose()))

# Create the vocabulary dictionary
id2wordna = dict((v, k) for k, v in cvna.vocabulary_.items())

In [53]:
# Let's start with 2 topics
ldana = models.LdaModel(corpus=corpusna, num_topics=2, id2word=id2wordna, passes=10)
ldana.print_topics()

[(0,
  '0.022*"date" + 0.022*"girl" + 0.019*"friend" + 0.018*"relationship" + 0.018*"work" + 0.016*"life" + 0.010*"help" + 0.008*"start" + 0.007*"look" + 0.007*"school"'),
 (1,
  '0.041*"women" + 0.015*"woman" + 0.013*"delet" + 0.008*"male" + 0.007*"hair" + 0.006*"masculin" + 0.005*"issu" + 0.005*"bodi" + 0.004*"gener" + 0.004*"gender"')]

In [55]:
#Pass2- Let's try 5 topics, 20 passes
ldana2 = models.LdaModel(corpus=corpusna, num_topics=5, id2word=id2wordna, passes=20)
ldana2.print_topics()

[(0,
  '0.037*"date" + 0.037*"girl" + 0.030*"work" + 0.026*"relationship" + 0.024*"friend" + 0.014*"life" + 0.014*"help" + 0.013*"school" + 0.010*"start" + 0.009*"look"'),
 (1,
  '0.019*"women" + 0.017*"masculin" + 0.017*"male" + 0.012*"gender" + 0.012*"issu" + 0.010*"health" + 0.009*"discuss" + 0.009*"help" + 0.008*"support" + 0.007*"commun"'),
 (2,
  '0.023*"life" + 0.014*"friend" + 0.012*"problem" + 0.012*"delet" + 0.011*"deal" + 0.009*"depress" + 0.008*"hate" + 0.008*"help" + 0.008*"relationship" + 0.007*"point"'),
 (3,
  '0.069*"women" + 0.035*"woman" + 0.014*"hair" + 0.013*"bodi" + 0.013*"partner" + 0.012*"look" + 0.012*"attract" + 0.010*"watch" + 0.008*"dick" + 0.007*"face"'),
 (4,
  '0.037*"parent" + 0.017*"wife" + 0.015*"favorit" + 0.014*"famili" + 0.014*"mother" + 0.013*"father" + 0.013*"money" + 0.011*"turn" + 0.011*"marri" + 0.011*"children"')]

In [56]:
#Pass 3 - Let's try 5 topics, 30 passes
ldana2 = models.LdaModel(corpus=corpusna, num_topics=5, id2word=id2wordna, passes=30)
ldana2.print_topics()

[(0,
  '0.017*"favorit" + 0.013*"movi" + 0.012*"watch" + 0.009*"wife" + 0.009*"think" + 0.009*"love" + 0.008*"babi" + 0.008*"play" + 0.008*"film" + 0.008*"compliment"'),
 (1,
  '0.037*"date" + 0.036*"girl" + 0.034*"friend" + 0.032*"relationship" + 0.009*"start" + 0.009*"look" + 0.008*"life" + 0.007*"kind" + 0.007*"work" + 0.007*"woman"'),
 (2,
  '0.073*"women" + 0.015*"male" + 0.014*"masculin" + 0.010*"gender" + 0.009*"issu" + 0.008*"woman" + 0.007*"discuss" + 0.007*"posit" + 0.006*"gener" + 0.006*"group"'),
 (3,
  '0.022*"hair" + 0.022*"bodi" + 0.017*"woman" + 0.016*"look" + 0.012*"dick" + 0.011*"masturb" + 0.009*"size" + 0.009*"face" + 0.009*"turn" + 0.009*"watch"'),
 (4,
  '0.031*"work" + 0.029*"life" + 0.025*"help" + 0.012*"school" + 0.011*"delet" + 0.010*"parent" + 0.010*"depress" + 0.008*"health" + 0.008*"home" + 0.007*"start"')]

1. Dating Advice
2. ?? 
3. ??

In [57]:
#Pass 3 - Let's try 5 topics, 40 passes
ldana2 = models.LdaModel(corpus=corpusna, num_topics=5, id2word=id2wordna, passes=40)
ldana2.print_topics()

[(0,
  '0.023*"delet" + 0.019*"hair" + 0.017*"bodi" + 0.014*"watch" + 0.011*"favorit" + 0.010*"dick" + 0.010*"look" + 0.008*"movi" + 0.007*"size" + 0.007*"porn"'),
 (1,
  '0.104*"relationship" + 0.043*"friend" + 0.024*"partner" + 0.018*"girlfriend" + 0.012*"cheat" + 0.012*"deal" + 0.011*"boyfriend" + 0.010*"marri" + 0.010*"woman" + 0.009*"masturb"'),
 (2,
  '0.025*"women" + 0.014*"male" + 0.014*"masculin" + 0.010*"issu" + 0.009*"gender" + 0.006*"posit" + 0.006*"discuss" + 0.006*"support" + 0.006*"gener" + 0.006*"help"'),
 (3,
  '0.116*"date" + 0.116*"girl" + 0.069*"women" + 0.033*"woman" + 0.019*"attract" + 0.018*"look" + 0.013*"number" + 0.011*"approach" + 0.010*"advic" + 0.009*"delet"'),
 (4,
  '0.024*"work" + 0.023*"life" + 0.016*"friend" + 0.014*"help" + 0.011*"start" + 0.010*"school" + 0.007*"problem" + 0.007*"point" + 0.006*"depress" + 0.006*"place"')]

In [58]:
#Pass 4 - Let's try 5 topics, 50 passes
ldana3 = models.LdaModel(corpus=corpusna, num_topics=5, id2word=id2wordna, passes=50)
ldana3.print_topics()

[(0,
  '0.027*"school" + 0.021*"work" + 0.019*"parent" + 0.016*"colleg" + 0.013*"friend" + 0.009*"class" + 0.009*"home" + 0.008*"weekend" + 0.008*"wife" + 0.008*"famili"'),
 (1,
  '0.033*"life" + 0.026*"work" + 0.023*"help" + 0.009*"depress" + 0.009*"problem" + 0.009*"start" + 0.007*"point" + 0.007*"hate" + 0.007*"chang" + 0.006*"care"'),
 (2,
  '0.053*"date" + 0.053*"girl" + 0.048*"relationship" + 0.042*"friend" + 0.010*"partner" + 0.009*"look" + 0.008*"text" + 0.008*"girlfriend" + 0.008*"convers" + 0.007*"advic"'),
 (3,
  '0.017*"women" + 0.016*"male" + 0.016*"masculin" + 0.011*"issu" + 0.011*"gender" + 0.007*"posit" + 0.007*"discuss" + 0.007*"group" + 0.006*"commun" + 0.006*"view"'),
 (4,
  '0.106*"women" + 0.055*"woman" + 0.035*"delet" + 0.020*"hair" + 0.014*"bodi" + 0.014*"look" + 0.011*"attract" + 0.010*"masturb" + 0.008*"face" + 0.007*"size"')]

In [60]:
#Pass 5 - Let's try 5 topics, 25 passes
ldana4 = models.LdaModel(corpus=corpusna, num_topics=5, id2word=id2wordna, passes=25)
ldana4.print_topics()

[(0,
  '0.020*"help" + 0.017*"work" + 0.013*"life" + 0.012*"health" + 0.007*"issu" + 0.006*"chang" + 0.005*"father" + 0.005*"support" + 0.005*"parent" + 0.005*"school"'),
 (1,
  '0.019*"hair" + 0.019*"bodi" + 0.016*"look" + 0.011*"dick" + 0.009*"masturb" + 0.009*"watch" + 0.008*"size" + 0.007*"face" + 0.007*"porn" + 0.006*"kind"'),
 (2,
  '0.070*"women" + 0.021*"masculin" + 0.021*"male" + 0.015*"gender" + 0.009*"discuss" + 0.009*"favorit" + 0.008*"group" + 0.008*"posit" + 0.008*"toxic" + 0.008*"role"'),
 (3,
  '0.070*"date" + 0.069*"girl" + 0.054*"relationship" + 0.027*"woman" + 0.025*"women" + 0.018*"delet" + 0.013*"partner" + 0.011*"attract" + 0.009*"number" + 0.009*"text"'),
 (4,
  '0.031*"friend" + 0.023*"work" + 0.023*"life" + 0.014*"start" + 0.012*"help" + 0.010*"school" + 0.008*"problem" + 0.008*"home" + 0.008*"point" + 0.007*"deal"')]

In [61]:
#Pass 6 - Let's try 6 topics, 50 passes
ldana5 = models.LdaModel(corpus=corpusna, num_topics=6, id2word=id2wordna, passes=50)
ldana5.print_topics()

[(0,
  '0.127*"women" + 0.053*"woman" + 0.045*"date" + 0.032*"relationship" + 0.023*"partner" + 0.020*"attract" + 0.010*"boyfriend" + 0.008*"curiou" + 0.008*"marri" + 0.008*"consid"'),
 (1,
  '0.041*"work" + 0.024*"help" + 0.015*"parent" + 0.014*"school" + 0.014*"life" + 0.013*"health" + 0.010*"home" + 0.009*"money" + 0.008*"colleg" + 0.007*"famili"'),
 (2,
  '0.053*"delet" + 0.031*"hair" + 0.029*"bodi" + 0.028*"look" + 0.017*"dick" + 0.015*"girl" + 0.014*"favorit" + 0.012*"face" + 0.011*"compliment" + 0.007*"want"'),
 (3,
  '0.022*"masturb" + 0.020*"virgin" + 0.020*"think" + 0.019*"rape" + 0.016*"peni" + 0.016*"porn" + 0.014*"watch" + 0.011*"consent" + 0.011*"size" + 0.010*"beer"'),
 (4,
  '0.019*"masculin" + 0.017*"male" + 0.013*"gender" + 0.012*"issu" + 0.009*"posit" + 0.008*"group" + 0.007*"role" + 0.007*"commun" + 0.007*"toxic" + 0.007*"discuss"'),
 (5,
  '0.026*"friend" + 0.025*"girl" + 0.020*"life" + 0.017*"relationship" + 0.017*"date" + 0.010*"start" + 0.009*"work" + 0.009*"hel

In [62]:
# Let's try 7 topics, 20 passes
ldana3 = models.LdaModel(corpus=corpusna, num_topics=7, id2word=id2wordna, passes=20)
ldana3.print_topics()

[(0,
  '0.036*"help" + 0.021*"issu" + 0.018*"problem" + 0.016*"emot" + 0.013*"health" + 0.013*"need" + 0.011*"relationship" + 0.009*"life" + 0.008*"think" + 0.007*"complet"'),
 (1,
  '0.041*"women" + 0.019*"masculin" + 0.016*"male" + 0.013*"gender" + 0.010*"posit" + 0.009*"gener" + 0.008*"group" + 0.008*"discuss" + 0.008*"role" + 0.007*"toxic"'),
 (2,
  '0.043*"work" + 0.014*"help" + 0.013*"watch" + 0.009*"favorit" + 0.008*"rape" + 0.008*"thank" + 0.008*"edit" + 0.007*"health" + 0.007*"product" + 0.007*"movi"'),
 (3,
  '0.045*"friend" + 0.015*"girl" + 0.014*"start" + 0.013*"look" + 0.010*"kind" + 0.008*"doesnt" + 0.008*"work" + 0.008*"situat" + 0.007*"hair" + 0.007*"convers"'),
 (4,
  '0.103*"delet" + 0.041*"bodi" + 0.032*"dick" + 0.023*"size" + 0.019*"peni" + 0.019*"curiou" + 0.018*"want" + 0.014*"face" + 0.013*"penis" + 0.012*"shower"'),
 (5,
  '0.043*"life" + 0.024*"work" + 0.018*"school" + 0.013*"parent" + 0.011*"help" + 0.010*"depress" + 0.010*"colleg" + 0.009*"home" + 0.009*"fami

In [63]:
# Let's try 7 topics, 30 passes
ldana4 = models.LdaModel(corpus=corpusna, num_topics=7, id2word=id2wordna, passes=30)
ldana4.print_topics()

[(0,
  '0.049*"help" + 0.039*"life" + 0.021*"depress" + 0.019*"health" + 0.016*"work" + 0.011*"struggl" + 0.010*"anxieti" + 0.008*"problem" + 0.008*"need" + 0.008*"emot"'),
 (1,
  '0.085*"delet" + 0.029*"boyfriend" + 0.024*"advic" + 0.017*"think" + 0.011*"thank" + 0.011*"particip" + 0.010*"studi" + 0.010*"research" + 0.010*"shower" + 0.008*"hung"'),
 (2,
  '0.078*"women" + 0.015*"masculin" + 0.014*"male" + 0.010*"gender" + 0.009*"issu" + 0.007*"posit" + 0.007*"gener" + 0.007*"group" + 0.007*"discuss" + 0.006*"role"'),
 (3,
  '0.022*"watch" + 0.015*"wife" + 0.013*"look" + 0.013*"favorit" + 0.012*"father" + 0.011*"mother" + 0.011*"video" + 0.010*"masturb" + 0.009*"marri" + 0.009*"play"'),
 (4,
  '0.074*"date" + 0.057*"work" + 0.022*"school" + 0.019*"girl" + 0.015*"life" + 0.015*"colleg" + 0.012*"parent" + 0.012*"start" + 0.011*"home" + 0.011*"number"'),
 (5,
  '0.037*"friend" + 0.035*"relationship" + 0.029*"girl" + 0.012*"life" + 0.010*"doesnt" + 0.010*"start" + 0.009*"deal" + 0.009*"kin

In [64]:
#5th Attempt, 20 passes, 4 topics
ldana5 = models.LdaModel(corpus=corpusna, num_topics=4, id2word=id2wordna, passes=20)
ldana5.print_topics()

[(0,
  '0.035*"relationship" + 0.030*"life" + 0.023*"work" + 0.022*"help" + 0.010*"school" + 0.009*"parent" + 0.008*"depress" + 0.007*"chang" + 0.007*"problem" + 0.006*"colleg"'),
 (1,
  '0.042*"delet" + 0.013*"favorit" + 0.010*"watch" + 0.008*"movi" + 0.008*"think" + 0.007*"valentin" + 0.007*"health" + 0.006*"film" + 0.006*"studi" + 0.006*"song"'),
 (2,
  '0.036*"date" + 0.036*"girl" + 0.033*"friend" + 0.012*"work" + 0.011*"look" + 0.010*"start" + 0.007*"kind" + 0.007*"girlfriend" + 0.006*"text" + 0.006*"doesnt"'),
 (3,
  '0.066*"women" + 0.029*"woman" + 0.012*"male" + 0.010*"masculin" + 0.008*"gender" + 0.007*"gener" + 0.007*"hair" + 0.007*"issu" + 0.007*"bodi" + 0.007*"partner"')]

In [65]:
#6th Attempt, 80 passes, 5 topics
ldana6 = models.LdaModel(corpus=corpusna, num_topics=5, id2word=id2wordna, passes=80)
ldana6.print_topics()

[(0,
  '0.023*"friend" + 0.023*"life" + 0.022*"girl" + 0.021*"relationship" + 0.017*"work" + 0.016*"date" + 0.010*"start" + 0.009*"school" + 0.009*"help" + 0.007*"look"'),
 (1,
  '0.029*"help" + 0.023*"work" + 0.020*"health" + 0.007*"support" + 0.006*"studi" + 0.006*"thank" + 0.006*"need" + 0.005*"watch" + 0.005*"money" + 0.005*"complet"'),
 (2,
  '0.034*"delet" + 0.022*"bodi" + 0.021*"hair" + 0.014*"look" + 0.011*"dick" + 0.010*"masturb" + 0.009*"size" + 0.008*"face" + 0.007*"peni" + 0.006*"sport"'),
 (3,
  '0.019*"women" + 0.019*"masculin" + 0.016*"male" + 0.012*"gender" + 0.011*"issu" + 0.009*"posit" + 0.008*"discuss" + 0.008*"group" + 0.007*"commun" + 0.007*"role"'),
 (4,
  '0.095*"women" + 0.051*"woman" + 0.032*"date" + 0.021*"partner" + 0.016*"attract" + 0.010*"boyfriend" + 0.010*"favorit" + 0.008*"curiou" + 0.007*"movi" + 0.006*"marri"')]

In [66]:
#7th Attempt, 90 passes, 5 topics
ldana7 = models.LdaModel(corpus=corpusna, num_topics=5, id2word=id2wordna, passes=90)
ldana7.print_topics()

[(0,
  '0.028*"delet" + 0.015*"watch" + 0.014*"wife" + 0.014*"parent" + 0.012*"mother" + 0.012*"father" + 0.008*"bodi" + 0.008*"masturb" + 0.008*"money" + 0.006*"marri"'),
 (1,
  '0.045*"women" + 0.015*"male" + 0.010*"gender" + 0.009*"issu" + 0.006*"posit" + 0.006*"group" + 0.006*"discuss" + 0.006*"commun" + 0.006*"gener" + 0.006*"role"'),
 (2,
  '0.045*"date" + 0.045*"girl" + 0.042*"relationship" + 0.028*"friend" + 0.018*"woman" + 0.014*"women" + 0.013*"look" + 0.008*"partner" + 0.008*"kind" + 0.008*"girlfriend"'),
 (3,
  '0.045*"masculin" + 0.021*"toxic" + 0.016*"favorit" + 0.010*"feminin" + 0.009*"song" + 0.008*"tran" + 0.008*"video" + 0.007*"product" + 0.006*"birthday" + 0.006*"kind"'),
 (4,
  '0.031*"life" + 0.031*"work" + 0.020*"help" + 0.013*"school" + 0.011*"friend" + 0.010*"start" + 0.009*"depress" + 0.008*"problem" + 0.007*"home" + 0.007*"point"')]

In [67]:
#8th Attempt, 100 passes, 5 topics
ldana8 = models.LdaModel(corpus=corpusna, num_topics=5, id2word=id2wordna, passes=100)
ldana8.print_topics()

[(0,
  '0.025*"bodi" + 0.021*"hair" + 0.016*"look" + 0.015*"watch" + 0.012*"favorit" + 0.011*"dick" + 0.011*"movi" + 0.010*"masturb" + 0.009*"size" + 0.008*"face"'),
 (1,
  '0.026*"life" + 0.025*"work" + 0.014*"help" + 0.011*"school" + 0.010*"friend" + 0.009*"start" + 0.008*"deal" + 0.007*"problem" + 0.007*"depress" + 0.007*"care"'),
 (2,
  '0.033*"delet" + 0.030*"help" + 0.017*"health" + 0.013*"rape" + 0.010*"victim" + 0.009*"assault" + 0.009*"need" + 0.009*"male" + 0.009*"studi" + 0.008*"support"'),
 (3,
  '0.062*"women" + 0.016*"masculin" + 0.012*"male" + 0.011*"gender" + 0.009*"issu" + 0.008*"posit" + 0.007*"woman" + 0.007*"group" + 0.007*"gener" + 0.007*"discuss"'),
 (4,
  '0.064*"date" + 0.063*"girl" + 0.048*"relationship" + 0.038*"friend" + 0.019*"woman" + 0.011*"partner" + 0.011*"attract" + 0.010*"girlfriend" + 0.010*"text" + 0.009*"look"')]

In [68]:
##Topic 1 - > Body Insecurity
##topic 2 - > General Issues
##Topic 3 - > Sexual Assault
##Topic 4 - > Gender roles/Male Masculinity
##Topic 5 - > Dating and Relationships

In [69]:
##Creating another model to add more stop words that don't help with honing down topics "song", "life" (too general),
##"look", "discuss", and "text"

In [71]:
add_stop_words = ['song', 'life', 'look', 'discuss', 'text']
stop_words2 = stop_words.stop_words + add_stop_words

cvna2 = CountVectorizer(stop_words=stop_words2, max_df=.8)
data_cvna2 = cvna2.fit_transform(data_nouns_adj.all_text)
data_dtmna2 = pd.DataFrame(data_cvna2.toarray(), columns=cvna2.get_feature_names())
data_dtmna2.index = data_nouns_adj.index
#data_dtmna2

In [73]:
# Create the gensim corpus
corpusna2 = matutils.Sparse2Corpus(scipy.sparse.csr_matrix(data_dtmna2.transpose()))

# Create the vocabulary dictionary
id2wordna2 = dict((v, k) for k, v in cvna2.vocabulary_.items())

In [74]:
#Take 1
ldana_1 = models.LdaModel(corpus=corpusna2, num_topics=5, id2word=id2wordna2, passes=20)
ldana_1.print_topics()

[(0,
  '0.057*"delet" + 0.032*"hair" + 0.021*"watch" + 0.020*"favorit" + 0.014*"movi" + 0.011*"video" + 0.009*"film" + 0.008*"want" + 0.007*"prefer" + 0.007*"product"'),
 (1,
  '0.015*"masculin" + 0.015*"issu" + 0.012*"health" + 0.009*"support" + 0.009*"posit" + 0.008*"help" + 0.008*"commun" + 0.007*"male" + 0.006*"gender" + 0.006*"toxic"'),
 (2,
  '0.095*"women" + 0.042*"woman" + 0.022*"date" + 0.016*"attract" + 0.015*"partner" + 0.011*"male" + 0.010*"gener" + 0.008*"kind" + 0.008*"dick" + 0.006*"curiou"'),
 (3,
  '0.039*"girl" + 0.035*"work" + 0.031*"date" + 0.031*"friend" + 0.014*"school" + 0.013*"help" + 0.010*"start" + 0.008*"home" + 0.008*"colleg" + 0.006*"parent"'),
 (4,
  '0.037*"relationship" + 0.010*"help" + 0.009*"deal" + 0.009*"problem" + 0.009*"depress" + 0.008*"doesnt" + 0.008*"care" + 0.007*"love" + 0.007*"hate" + 0.007*"start"')]

In [75]:
#Take 2
ldana_2 = models.LdaModel(corpus=corpusna2, num_topics=5, id2word=id2wordna2, passes=30)
ldana_2.print_topics()

[(0,
  '0.017*"help" + 0.012*"problem" + 0.010*"work" + 0.009*"school" + 0.008*"depress" + 0.008*"point" + 0.007*"hate" + 0.006*"issu" + 0.006*"care" + 0.006*"start"'),
 (1,
  '0.040*"friend" + 0.039*"relationship" + 0.030*"work" + 0.022*"date" + 0.018*"girl" + 0.012*"start" + 0.008*"home" + 0.008*"colleg" + 0.007*"girlfriend" + 0.007*"love"'),
 (2,
  '0.029*"masculin" + 0.021*"hair" + 0.019*"watch" + 0.014*"favorit" + 0.013*"posit" + 0.011*"movi" + 0.010*"role" + 0.009*"video" + 0.008*"porn" + 0.008*"male"'),
 (3,
  '0.013*"gender" + 0.012*"male" + 0.010*"health" + 0.010*"support" + 0.010*"issu" + 0.009*"group" + 0.009*"rape" + 0.007*"children" + 0.007*"topic" + 0.006*"commun"'),
 (4,
  '0.084*"women" + 0.053*"girl" + 0.046*"date" + 0.036*"woman" + 0.023*"delet" + 0.014*"attract" + 0.010*"bodi" + 0.009*"partner" + 0.008*"number" + 0.007*"approach"')]

In [76]:
#Take 3
ldana_3 = models.LdaModel(corpus=corpusna2, num_topics=5, id2word=id2wordna2, passes=40)
ldana_3.print_topics()

[(0,
  '0.061*"woman" + 0.015*"father" + 0.014*"partner" + 0.013*"rape" + 0.012*"mother" + 0.011*"abus" + 0.011*"wife" + 0.011*"marri" + 0.011*"children" + 0.009*"victim"'),
 (1,
  '0.056*"date" + 0.049*"relationship" + 0.047*"girl" + 0.018*"help" + 0.017*"friend" + 0.010*"girlfriend" + 0.009*"deal" + 0.009*"love" + 0.008*"depress" + 0.007*"start"'),
 (2,
  '0.058*"women" + 0.012*"masculin" + 0.011*"male" + 0.008*"gender" + 0.008*"issu" + 0.007*"help" + 0.007*"posit" + 0.006*"gener" + 0.006*"commun" + 0.005*"health"'),
 (3,
  '0.029*"work" + 0.022*"friend" + 0.013*"school" + 0.010*"start" + 0.008*"help" + 0.008*"point" + 0.008*"parent" + 0.008*"home" + 0.008*"place" + 0.007*"doesnt"'),
 (4,
  '0.042*"delet" + 0.024*"bodi" + 0.013*"dick" + 0.013*"favorit" + 0.012*"masturb" + 0.010*"size" + 0.008*"peni" + 0.007*"face" + 0.006*"valentin" + 0.006*"kind"')]

In [77]:
#Take 4
ldana_4 = models.LdaModel(corpus=corpusna2, num_topics=5, id2word=id2wordna2, passes=50)
ldana_4.print_topics()

[(0,
  '0.025*"work" + 0.019*"help" + 0.011*"school" + 0.010*"start" + 0.008*"depress" + 0.008*"parent" + 0.008*"home" + 0.007*"care" + 0.007*"problem" + 0.006*"point"'),
 (1,
  '0.060*"women" + 0.033*"delet" + 0.026*"woman" + 0.020*"hair" + 0.017*"partner" + 0.012*"bodi" + 0.011*"rape" + 0.009*"masturb" + 0.008*"face" + 0.007*"assault"'),
 (2,
  '0.042*"women" + 0.016*"masculin" + 0.013*"male" + 0.011*"issu" + 0.011*"gender" + 0.007*"view" + 0.007*"gener" + 0.007*"group" + 0.007*"posit" + 0.006*"role"'),
 (3,
  '0.057*"date" + 0.056*"girl" + 0.051*"relationship" + 0.046*"friend" + 0.010*"girlfriend" + 0.009*"woman" + 0.007*"start" + 0.007*"kind" + 0.007*"convers" + 0.007*"attract"'),
 (4,
  '0.012*"help" + 0.010*"health" + 0.009*"work" + 0.008*"thank" + 0.008*"studi" + 0.007*"product" + 0.007*"birthday" + 0.006*"research" + 0.006*"list" + 0.006*"particip"')]

In [78]:
#Take 5
#ldana_5 = models.LdaModel(corpus=corpusna2, num_topics=5, id2word=id2wordna2, passes=60)
#ldana_5.print_topics()

[(0,
  '0.015*"partner" + 0.012*"health" + 0.010*"help" + 0.010*"support" + 0.010*"issu" + 0.010*"male" + 0.009*"rape" + 0.007*"victim" + 0.006*"assault" + 0.006*"research"'),
 (1,
  '0.082*"women" + 0.034*"woman" + 0.018*"masculin" + 0.013*"date" + 0.013*"attract" + 0.012*"gender" + 0.012*"male" + 0.008*"gener" + 0.007*"toxic" + 0.006*"group"'),
 (2,
  '0.025*"delet" + 0.019*"girlfriend" + 0.012*"wife" + 0.010*"mother" + 0.010*"watch" + 0.010*"father" + 0.009*"marri" + 0.009*"parent" + 0.009*"brother" + 0.008*"cheat"'),
 (3,
  '0.024*"friend" + 0.023*"work" + 0.022*"girl" + 0.022*"relationship" + 0.020*"date" + 0.015*"help" + 0.011*"start" + 0.010*"school" + 0.007*"problem" + 0.007*"point"'),
 (4,
  '0.019*"bodi" + 0.018*"hair" + 0.010*"dick" + 0.008*"watch" + 0.008*"love" + 0.007*"favorit" + 0.007*"size" + 0.007*"face" + 0.007*"posit" + 0.006*"peni"')]

##Topic 1 --> Sexual Trauma\
##Topic 2--> Gender Identity/Roles\
##Topic 3--> Familial Issues\
##topic 4--> Dating Advice/Relationships\
##Topic 5--> Body Issues