In [13]:
import matplotlib.pyplot as plt
import gensim
import numpy as np
import pandas as pd
import spacy
import pickle 

from gensim.models import CoherenceModel, LdaModel, LsiModel, HdpModel
from gensim.models.wrappers import LdaMallet
from gensim.corpora import Dictionary, MmCorpus
import pyLDAvis.gensim

import os, re, operator, warnings
warnings.filterwarnings('ignore') 
%matplotlib inline

# Communities topic models

In [14]:
MR_model = LdaModel.load('./lda/MR_lda_model.gensim')
MGTOW_model = LdaModel.load('./lda/MGTOW_lda_model.gensim')
RP_model = LdaModel.load('./lda/RP_lda_model.gensim')

In [18]:
Incel_model = LdaModel.load('./lda/Incel_lda_model.gensim')

In [15]:
MR_model.print_topics()

[(0,
  '0.054*"people" + 0.044*"white" + 0.030*"group" + 0.029*"black" + 0.022*"culture" + 0.021*"race" + 0.017*"class" + 0.012*"racist" + 0.012*"country" + 0.011*"american"'),
 (1,
  '0.025*"gender" + 0.016*"difference" + 0.015*"male" + 0.015*"female" + 0.013*"sex" + 0.011*"high" + 0.010*"human" + 0.010*"average" + 0.009*"likely" + 0.008*"effect"'),
 (2,
  '0.036*"child" + 0.017*"right" + 0.014*"parent" + 0.013*"law" + 0.013*"want" + 0.013*"mother" + 0.013*"father" + 0.012*"kid" + 0.009*"sex" + 0.009*"abortion"'),
 (3,
  '0.041*"work" + 0.017*"job" + 0.013*"time" + 0.011*"like" + 0.010*"pay" + 0.010*"go" + 0.009*"good" + 0.009*"need" + 0.008*"get" + 0.008*"money"'),
 (4,
  '0.045*"people" + 0.029*"think" + 0.020*"thing" + 0.018*"like" + 0.014*"bad" + 0.013*"feel" + 0.013*"know" + 0.012*"want" + 0.012*"good" + 0.012*"way"'),
 (5,
  '0.167*"woman" + 0.163*"man" + 0.019*"male" + 0.018*"gender" + 0.011*"sex" + 0.011*"female" + 0.011*"think" + 0.009*"society" + 0.007*"issue" + 0.007*"want"

In [16]:
MGTOW_model.print_topics()

[(0,
  '0.061*"chad" + 0.023*"porn" + 0.020*"disgusting" + 0.020*"archive" + 0.020*"drug" + 0.019*"feminism" + 0.015*"test" + 0.015*"patriarchy" + 0.015*"drink" + 0.013*"feminist"'),
 (1,
  '0.034*"people" + 0.015*"think" + 0.013*"thing" + 0.012*"like" + 0.010*"world" + 0.009*"know" + 0.009*"way" + 0.006*"mean" + 0.006*"life" + 0.005*"society"'),
 (2,
  '0.118*"s" + 0.067*"m" + 0.050*"https_www" + 0.040*"com" + 0.034*"d" + 0.027*"article" + 0.026*"u" + 0.024*"youtube_com" + 0.024*"archive_org" + 0.022*"watch_v"'),
 (3,
  '0.033*"like" + 0.030*"fuck" + 0.026*"guy" + 0.023*"s" + 0.019*"get" + 0.019*"know" + 0.018*"girl" + 0.018*"shit" + 0.015*"want" + 0.014*"think"'),
 (4,
  '0.110*"woman" + 0.084*"man" + 0.018*"want" + 0.012*"sex" + 0.011*"like" + 0.011*"life" + 0.010*"female" + 0.008*"think" + 0.008*"way" + 0.008*"need"'),
 (5,
  '0.070*"look" + 0.037*"white" + 0.037*"fat" + 0.028*"dick" + 0.026*"black" + 0.025*"like" + 0.018*"wear" + 0.017*"body" + 0.015*"lmao" + 0.015*"big"'),
 (6,
 

In [17]:
RP_model.print_topics()

[(0,
  '0.094*"man" + 0.084*"woman" + 0.018*"people" + 0.016*"male" + 0.015*"society" + 0.014*"female" + 0.012*"feminist" + 0.011*"world" + 0.010*"rape" + 0.009*"like"'),
 (1,
  '0.029*"work" + 0.020*"job" + 0.019*"money" + 0.017*"time" + 0.014*"year" + 0.013*"pay" + 0.011*"people" + 0.010*"life" + 0.010*"good" + 0.010*"go"'),
 (2,
  '0.222*"s" + 0.094*"not" + 0.073*"don_t" + 0.048*"m" + 0.029*"archive_org" + 0.028*"be" + 0.025*"d" + 0.024*"t" + 0.024*"com" + 0.022*"http_www"'),
 (3,
  '0.021*"thing" + 0.021*"\'" + 0.020*"people" + 0.019*"like" + 0.019*"life" + 0.017*"feel" + 0.017*"think" + 0.016*"want" + 0.015*"way" + 0.014*"need"'),
 (4,
  '0.122*"trp" + 0.024*"wear" + 0.021*"energy" + 0.021*"social_medium" + 0.019*"porn" + 0.013*"dress" + 0.011*"god" + 0.011*"anxiety" + 0.011*"pic" + 0.010*"professional"'),
 (5,
  '0.098*"woman" + 0.055*"man" + 0.027*"sex" + 0.024*"guy" + 0.022*"want" + 0.019*"girl" + 0.015*"relationship" + 0.014*"look" + 0.011*"alpha" + 0.010*"think"'),
 (6,
  '0.

In [19]:
Incel_model.print_topics()

[(0,
  '0.227*"s" + 0.142*"\'" + 0.063*"bro" + 0.058*"don_t" + 0.056*"m" + 0.040*"lmao" + 0.026*"t" + 0.024*"cope" + 0.019*"ve" + 0.017*"d"'),
 (1,
  '0.050*"tbh" + 0.033*"damn" + 0.026*"loser" + 0.023*"win" + 0.020*"isn_t" + 0.017*"suffer" + 0.015*"nah" + 0.014*"son" + 0.014*"wear" + 0.013*"abuse"'),
 (2,
  '0.038*"people" + 0.036*"like" + 0.035*"feel" + 0.028*"life" + 0.020*"want" + 0.019*"think" + 0.019*"bad" + 0.016*"help" + 0.016*"thing" + 0.015*"good"'),
 (3,
  '0.107*"post" + 0.034*"sub" + 0.023*"reddit" + 0.023*"read" + 0.020*"ban" + 0.016*"thread" + 0.015*"rape" + 0.015*"fake" + 0.015*"op" + 0.014*"subreddit"'),
 (4,
  '0.087*"not" + 0.070*"white" + 0.037*"short" + 0.036*"black" + 0.034*"be" + 0.030*"height" + 0.030*"tall" + 0.027*"face" + 0.026*"asian" + 0.025*"guy"'),
 (5,
  '0.025*"child" + 0.023*"need" + 0.019*"boy" + 0.017*"money" + 0.016*"eat" + 0.014*"body" + 0.014*"kid" + 0.014*"beta" + 0.011*"big" + 0.011*"work"'),
 (6,
  '0.118*"look" + 0.061*"guy" + 0.056*"like" + 0

In [21]:
years = [2014, 2015, 2016, 2017, 2018]

In [22]:
from gensim.models.wrappers.dtmmodel import DtmModel
MGTOW_corpus = MmCorpus('./data/MGTOW_hdp_corpus.mm')
MGTOW_dict =  Dictionary.load('./data/MGTOW_hdp_dictionary.dict')
MGTOW_time_seq = []
for year in years:
    c = MmCorpus(f'./data/MGTOW_hdp_corpus_{year}.mm')
    MGTOW_time_seq.append(len(c))
MGTOW_time_seq.append(len(MGTOW_corpus) - sum(MGTOW_time_seq))

In [23]:
dtm_path = "./data/dtm/dtm-linux64"

In [None]:
MGTOW_model = DtmModel(dtm_path, MGTOW_corpus, MGTOW_time_seq, num_topics=10,
                 id2word=MGTOW_dict, initialize_lda=True)

In [None]:
MGTOW_model.save("./lda/MGTOW_dtm.gensim")