## Get topics from each subreddit

In [1]:
from sentence_transformers import SentenceTransformer
import pandas as pd
from sklearn.cluster import KMeans

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = pd.read_csv('data/final_dataset.csv')

In [3]:
dataset

Unnamed: 0,text,labels,id,author,subreddit,created_utc
0,My favourite food is anything I didn't have to...,[27],eebbqej,Rockzilla78,KitchenConfidential,2019-01-18
1,"Now if he does off himself, everyone will thin...",[27],ed00q6i,cocaineluna,morbidquestions,2019-01-01
2,WHY THE FUCK IS BAYLESS ISOING,[2],eezlygj,minnesotagoat_,timberwolves,2019-01-26
3,To make her feel threatened,[14],ed7ypvh,AlexNic1013,askwomenadvice,2019-01-04
4,Dirty Southern Wankers,[3],ed0bdzj,having_a_nap,london,2019-01-01
...,...,...,...,...,...,...
54258,Thanks. I was diagnosed with BP 1 after the ho...,[15],efeeasc,scaredyk4t,bipolar,2019-01-30
54259,Well that makes sense.,[4],ef9c7s3,LX_Emergency,ABoringDystopia,2019-01-29
54260,Daddy issues [NAME],[27],efbiugo,your_ex_girlfriend-,SquaredCircle,2019-01-30
54261,So glad I discovered that subreddit a couple m...,[0],efbvgp9,Paladin-Arda,AskALiberal,2019-01-30


In [3]:
dataset['subreddit'].value_counts()

subreddit
loveafterlockup     227
socialanxiety       218
cringe              218
AnimalsBeingBros    216
timberwolves        209
                   ... 
canes                35
SSBM                 34
Anticonsumption      33
shieldbro            30
farcry               24
Name: count, Length: 483, dtype: int64

**Steps**:
+ Use Hugging Face model `SentenceTransformer('all-MiniLM-L6-v2')`
+ Encode each text into an embedding with method `encode()`
+ Group by subreddit and compute the mean embedding per subreddit
+ Cluster the subreddit embeddings using `KMeans`
+ Explore clusters and manually assign topic labels

In [4]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-V2')

dataset['embedding'] = dataset['text'].apply(lambda x: model.encode(x))

In [5]:
dataset['embedding'][0].shape

(384,)

In [6]:
subreddit_embeddings = (
    dataset.groupby('subreddit')['embedding']
    .apply(lambda x: sum(x) / len(x))
    .reset_index()
)

In [7]:
subreddit_embeddings

Unnamed: 0,subreddit,embedding
0,2meirl4meirl,"[-0.015225524, -0.003186024, 0.03313151, 0.006..."
1,49ers,"[-0.018552188, -0.0028821093, -0.010455885, -0..."
2,4PanelCringe,"[-0.023192884, 0.0039603934, 0.010494498, -0.0..."
3,90DayFiance,"[-0.019507552, -0.00010089574, 0.018924331, -0..."
4,90dayfianceuncensored,"[-0.016648285, -0.0016126882, 0.009415893, -0...."
...,...,...
478,worldpolitics,"[0.0021841251, 0.0019621311, 0.011987819, -0.0..."
479,yesyesyesyesno,"[-0.0083622085, 0.011073343, 0.018445078, -0.0..."
480,youseeingthisshit,"[-0.014335996, 0.014376306, 0.0155483335, -0.0..."
481,youtube,"[-0.023860248, -0.022737052, 0.0056497906, -0...."


In [8]:
subreddit_embeddings[subreddit_embeddings['subreddit']=='2meirl4meirl']['embedding'][0].shape

(384,)

In [9]:
n_clusters = 10
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
subreddit_embeddings['cluster'] = kmeans.fit_predict(list(subreddit_embeddings['embedding']))

In [10]:
def get_text_from_subreddit(df, subreddit):
    return df[df['subreddit']==subreddit]['text']

In [11]:
subreddit_embeddings

Unnamed: 0,subreddit,embedding,cluster
0,2meirl4meirl,"[-0.015225524, -0.003186024, 0.03313151, 0.006...",5
1,49ers,"[-0.018552188, -0.0028821093, -0.010455885, -0...",8
2,4PanelCringe,"[-0.023192884, 0.0039603934, 0.010494498, -0.0...",5
3,90DayFiance,"[-0.019507552, -0.00010089574, 0.018924331, -0...",9
4,90dayfianceuncensored,"[-0.016648285, -0.0016126882, 0.009415893, -0....",9
...,...,...,...
478,worldpolitics,"[0.0021841251, 0.0019621311, 0.011987819, -0.0...",3
479,yesyesyesyesno,"[-0.0083622085, 0.011073343, 0.018445078, -0.0...",5
480,youseeingthisshit,"[-0.014335996, 0.014376306, 0.0155483335, -0.0...",5
481,youtube,"[-0.023860248, -0.022737052, 0.0056497906, -0....",5


In [12]:
dataset

Unnamed: 0,text,labels,id,author,subreddit,created_utc,embedding
0,My favourite food is anything I didn't have to...,[27],eebbqej,Rockzilla78,KitchenConfidential,2019-01-18,"[0.01561645, 0.022650918, 0.031703655, 0.07407..."
1,"Now if he does off himself, everyone will thin...",[27],ed00q6i,cocaineluna,morbidquestions,2019-01-01,"[0.03288236, -0.05128799, -0.022228956, 0.0052..."
2,WHY THE FUCK IS BAYLESS ISOING,[2],eezlygj,minnesotagoat_,timberwolves,2019-01-26,"[-0.026340367, 0.019024402, 0.07136809, 0.0087..."
3,To make her feel threatened,[14],ed7ypvh,AlexNic1013,askwomenadvice,2019-01-04,"[0.001643833, 0.072182246, 0.06253768, 0.04545..."
4,Dirty Southern Wankers,[3],ed0bdzj,having_a_nap,london,2019-01-01,"[-0.049062364, -0.044063635, 0.01824677, -0.03..."
...,...,...,...,...,...,...,...
54258,Thanks. I was diagnosed with BP 1 after the ho...,[15],efeeasc,scaredyk4t,bipolar,2019-01-30,"[-0.05102062, -0.027362593, -0.0054752217, 0.0..."
54259,Well that makes sense.,[4],ef9c7s3,LX_Emergency,ABoringDystopia,2019-01-29,"[0.018713305, 0.017839583, 0.053923544, -0.051..."
54260,Daddy issues [NAME],[27],efbiugo,your_ex_girlfriend-,SquaredCircle,2019-01-30,"[-0.019132588, 0.012533258, -0.038491532, 0.02..."
54261,So glad I discovered that subreddit a couple m...,[0],efbvgp9,Paladin-Arda,AskALiberal,2019-01-30,"[-0.06342228, -0.09550535, -0.0056812265, 0.07..."


In [13]:
l = 0
for _, row in subreddit_embeddings.iterrows():
    l += len(get_text_from_subreddit(dataset, row['subreddit']))

print(l)

54263


In [14]:
pd.set_option('display.max_colwidth', None)

| **Cluster** | **Topic label** |
|---------|-------------|
| 0       | Humor       |
| 1       | Other       |
| 2       | Sport       |
| 3       | Politics       |
| 4       | Love and relationship       |
| 5       | Film and TV series       |
| 6       | Videogames       |

In [15]:
clusters = {
    'cluster': ['0', '1', '2', '3', '4', '5', '6'],
    'topic label': ['Humor', 'Other', 'Sport', 'Politics', 'Love and relationship', 'Film and Tv series', 'Videogames'],
    'Number of samples': [None] * 7
}

clusters = pd.DataFrame(clusters)

clusters

Unnamed: 0,cluster,topic label,Number of samples
0,0,Humor,
1,1,Other,
2,2,Sport,
3,3,Politics,
4,4,Love and relationship,
5,5,Film and Tv series,
6,6,Videogames,


In [16]:
subreddit_embeddings['cluster'].value_counts()

cluster
5    106
3     89
7     68
4     65
9     47
2     40
6     40
8     22
0      5
1      1
Name: count, dtype: int64

In [17]:
# lenghts
def get_lenght(df, n_cluster):
    cluster = df[df['cluster']==n_cluster]
    lis = list(cluster['subreddit'])
    l = 0
    for s in lis:
        l += len(get_text_from_subreddit(dataset, s))

    return l 

In [18]:
sum = 0
for i in range(10):
    l = get_lenght(subreddit_embeddings, i)
    print(f"number of samples cluster {i} = {l}")
    sum += l 

print(sum)

number of samples cluster 0 = 439
number of samples cluster 1 = 45
number of samples cluster 2 = 4238
number of samples cluster 3 = 10273
number of samples cluster 4 = 9026
number of samples cluster 5 = 13307
number of samples cluster 6 = 3305
number of samples cluster 7 = 5839
number of samples cluster 8 = 2804
number of samples cluster 9 = 4987
54263


### Cluster 0 (Humor)

In [19]:
cluster0 = subreddit_embeddings[subreddit_embeddings['cluster']==0]
cluster0

Unnamed: 0,subreddit,embedding,cluster
112,Instagramreality,"[-0.009423519, 0.009711213, 0.023142347, -0.0014001647, -0.00069928664, -0.029505251, 0.04081976, -0.006224382, 0.005340592, -0.014167477, 0.001566375, -0.030661065, 0.0005357002, -0.018926734, -0.011095735, 0.007919993, 0.025833583, -0.0097108735, -0.027370814, 0.028470313, -0.01881666, 0.01684928, 0.025764681, 0.0107351495, -0.022724222, -0.010464401, 0.0054485807, -0.010136576, -0.00010675106, -0.0045077247, -0.027396929, 0.03188434, -0.01828086, 0.022229662, -0.020517435, 0.0043704975, 0.016051406, 0.036535516, -0.009056165, -0.015504201, -0.016905624, -0.024633463, -0.016651351, 0.013134284, -0.019643199, -0.015283076, 0.006353502, -0.023172943, 0.007735321, -0.041633613, -0.038090676, -0.044942033, -0.0127984695, -0.018595299, -0.0063101333, 0.021226032, 0.0040508197, -0.007214204, 0.031695, 0.011421279, 0.0032759365, 0.0350561, -0.006504772, 0.021229139, 0.02518246, -0.008555883, -0.0051475563, -0.014045771, -0.00477137, 0.010211583, 0.019594137, 0.0066176862, -0.013123977, 0.015422522, -0.0094084535, 0.010456675, 0.027201286, -0.019576633, 0.022996688, 0.035629876, 0.0011654315, -0.005282985, 0.024590576, 0.01313587, -0.00478079, -0.0011819986, -0.021363905, -0.053993173, -0.034840237, -0.0018340464, -0.037083484, -0.0011762105, 0.0050810776, -0.0071819737, 0.0017145508, -0.016650502, -0.015250044, 0.0016886136, -0.054664478, 0.056107268, ...]",0
119,Justfuckmyshitup,"[-0.028445255, 0.020124825, 0.018159807, -0.01889363, -0.010291366, -0.0233598, 0.03641206, -0.01823581, -0.002011181, -0.009708881, 0.00024799362, -0.009370487, -0.0008624026, -0.0014329958, -0.009760851, 0.01365203, 0.0055586784, -0.0065413453, -0.012341523, 0.014389235, -0.00960591, 0.022511987, 0.004617863, 0.0009209087, -0.014126093, 0.006409618, 0.01884187, 0.0081469435, -0.0054947357, -0.024437815, -0.008792562, 0.022309685, -0.015386082, 0.0041684653, -0.020729525, -0.014526561, 0.014603802, 0.042460423, 0.0080575785, -0.010795561, -0.008466319, -0.015917428, 0.0039134645, 0.009618145, -0.023050718, -0.0052705673, -0.0046686, -0.022830255, 0.026570382, -0.0055171154, -0.033912342, -0.04057475, 0.0013432605, -0.022055067, -0.0068200827, 0.020352798, 0.012213376, -0.00833372, 0.016933803, 0.018706389, 0.0015252109, 0.014753811, -0.015900455, 0.013424457, 0.015771333, -0.0146015305, -0.01037954, -0.030117022, 0.007183799, 0.011017408, 0.014972039, 0.006341818, -0.016595116, 0.008250077, -0.023909668, 0.0003717664, 0.013784275, -0.015869256, 0.019485, 0.023573475, 0.00052213424, -0.023213906, 0.007764485, 0.014112251, -0.009533212, 0.015211253, -0.01846821, -0.05141514, -0.024833005, 0.0063401717, -0.03348364, 0.0039197127, 0.009587608, -0.0037082992, -0.00676802, -0.0037339549, -0.005018573, -0.0047548427, -0.06821519, 0.06171023, ...]",0
180,RoastMe,"[-0.023977008, 0.005704335, 0.023990734, 0.0104857385, 0.0035585645, -0.038461212, 0.05999834, -0.008822876, 0.0020962676, -0.02349698, 0.00030247797, -0.047003254, 0.0044621816, 0.0072826254, -0.00369462, 0.00888722, 0.015431181, -0.012570888, -0.020640526, 0.012485693, -0.015021567, 0.035980314, 0.01150588, -0.005438355, -0.022300335, 0.02050426, 0.0015517788, 0.012893387, -0.015725002, -0.0068547106, -0.0030706255, 0.0036309764, -0.008186939, -0.005778624, -0.020557113, -0.0064146384, 0.009320637, 0.014014887, -0.0014028552, -0.0124380225, 0.0023785026, -0.015463007, -2.7425674e-05, 0.0030913204, -0.0027531541, 0.00093339774, -0.0072944164, -0.0070089055, 0.016537212, -0.02822569, -0.033815358, -0.033758193, 0.011261586, 7.111044e-05, -0.0020422079, 0.0090969615, 0.0027951063, 0.0044118436, 0.01681232, 0.0313192, -0.019822754, 0.020721681, -0.013594822, 0.016773997, 0.014778312, 0.0026015248, -0.008211727, -0.0051552663, 0.003227567, 0.030924164, 0.005631908, -0.00046669453, -0.015756505, 0.0038585863, -0.017487302, 0.0042773234, 0.024494272, -0.036877684, 0.03976713, 0.044441212, -0.01910028, -0.008830195, -0.0015914576, 0.009190792, -0.024277445, -0.0020316597, 0.003677233, -0.03872049, -0.023206253, 0.008927997, -0.029644223, -0.0005878981, 0.021719614, 0.010830802, 0.0036234248, -0.00077351177, -0.023773156, 0.01314559, -0.07190466, 0.075961806, ...]",0
251,awfuleverything,"[-0.014851497, 0.003980163, 0.02440402, 0.007597368, -0.004130916, -0.020831967, 0.04476364, -0.015997676, -0.00038155593, -0.010867875, 0.0023741298, -0.025731755, -0.0072466005, 0.0010523457, -0.016597565, 0.0062399083, 0.016923005, -0.009436727, -0.018585622, 0.018569548, -0.015366091, 0.024924953, 0.023824407, 0.001439631, -0.017013993, 0.007952674, 0.0051411693, -0.0051366007, -0.005716529, -0.0080130035, -0.016772024, 0.029485011, -0.022111714, 0.00714267, -0.010896034, 0.0013815189, 0.018573137, 0.029531192, 0.0072879423, -0.0028245726, -0.010975607, -0.030446688, -0.0009520181, 0.009086274, -0.020326778, -0.00063690194, 0.0030495094, -0.011308441, 0.020171212, -0.031859305, -0.018977242, -0.037447073, -0.004457893, -0.015276055, 0.004629264, 0.019730922, 0.0033100834, -0.01878158, 0.023119844, 0.01237555, -0.0042907773, 0.028076963, -0.021199945, 0.020419149, 0.012785883, -0.009026764, -1.3679623e-05, -0.0152700795, 0.0071960804, 0.01594224, 0.021002688, 0.013978603, -0.018364532, 0.019901393, -0.00906351, 0.0046168836, 0.026527476, -0.028181696, 0.026500346, 0.034621615, -0.0076430077, -0.013719439, 0.022763815, 0.014138704, -0.0013998538, 0.009082903, -0.005374203, -0.047565192, -0.027357502, -0.0045271884, -0.041643605, -0.003401645, -0.00012890626, 0.0045040115, -0.014429147, -0.010624634, -0.019120017, -0.005304706, -0.060899932, 0.06286523, ...]",0
368,muacirclejerk,"[-0.03514809, 0.010228868, 0.023469847, 0.007602292, 0.027537297, -0.026749698, 0.051592957, -0.017084783, 0.012298574, -0.014595222, 0.0026908545, -0.03133375, 0.008596729, 0.0005508926, -0.01661545, 0.0034361994, 0.009344051, -0.013268987, -0.021546489, 0.0073980084, -0.01920936, 0.008909476, 0.00850171, 0.004078674, -0.012037902, 0.008529347, -0.001205787, -0.0020519844, -0.010989136, 0.005821623, -0.01715439, 0.028270783, -0.011304916, 0.00097201444, -0.014146244, -0.0133244665, 0.018337963, 0.010007783, 0.005598416, 0.009861457, -0.008543642, -0.0155580025, -0.0036388494, 0.009467059, -0.001545374, 0.0073387404, 0.0024514769, -0.009817866, 0.019674847, -0.033238534, -0.02681946, -0.041897975, -0.010075311, -0.024020959, -0.0036890171, 0.013928464, -0.01079485, -0.02023013, 0.020806434, 0.018938163, -0.012363544, 0.017446084, -0.008005911, 0.012256889, 0.02678762, -0.009145764, -0.008557783, -0.00893233, 0.005059847, 0.010988164, 0.0018409118, 0.008031756, -0.018020889, 0.024745151, -0.012465176, 0.021104453, 0.017883712, -0.037130177, 0.010731211, 0.015403116, 0.0030843269, -0.007674418, 0.022203662, 0.013848374, -0.012720754, -0.0037859518, -0.001993092, -0.034737885, -0.0056660003, -0.0040497407, -0.0099764215, 0.004375234, -0.007492531, 0.0047776503, -0.021146536, -0.01549038, -0.019403556, 0.003135769, -0.058079947, 0.051165663, ...]",0


In [20]:
# cluster 5 -> cluster 0
subreddit_embeddings.loc[subreddit_embeddings['cluster']==5, 'cluster'] = 0

# cluster 1 -> cluster 0 
subreddit_embeddings.loc[subreddit_embeddings['cluster']==1, 'cluster'] = 0

In [21]:
subreddit_embeddings[subreddit_embeddings['cluster']==0]

Unnamed: 0,subreddit,embedding,cluster
0,2meirl4meirl,"[-0.015225524, -0.003186024, 0.03313151, 0.006350386, 0.024410237, -0.025254453, 0.04390977, -0.0058279918, 0.022215879, -0.02113416, -0.008272042, -0.017277224, 0.0005259701, 0.0146532515, -0.00018366391, 0.0065619093, 0.015795756, -0.0196112, -0.03444035, 0.01683993, -0.022603214, 0.025492871, -0.0027485809, 0.0030058245, -0.017045945, 0.017293913, -0.0053343, -0.00092097145, -0.014344458, -0.004288831, -0.013948282, 0.027729714, -0.00624809, 0.0010195352, -0.0011348142, -0.006760229, -0.0039269924, -0.005572145, 0.004685422, 0.0019570033, 0.0055032135, 0.0016195015, 0.012987115, 0.0010843549, 0.004612709, -0.004861469, -0.0102725625, -0.015235719, 0.036755502, -0.012383166, -0.023019752, -0.016407376, -0.014417428, -0.0028915976, 0.017426072, 0.02310031, 0.005130199, 0.010803099, 0.024310114, 0.005927755, -0.013256294, 0.015327123, -0.0073854825, 0.0061507258, 0.02964968, 0.007495445, -0.009338666, 0.0022637343, -0.010387419, 0.028194552, -0.005761978, 0.008700418, -0.009146753, 0.020950273, -0.013040407, 0.007349688, 0.009512057, -0.04226785, 0.018402874, 0.032723792, -0.00872738, -0.008868415, -0.0090967035, 0.0004565285, -0.013747484, -0.009730945, 0.013441699, -0.014010522, 0.005549174, 0.005738937, -0.026287165, -0.0064369226, 0.0094893575, 0.013410227, -0.016965017, -0.022709789, -0.020837422, 0.01089403, -0.0650929, 0.069009155, ...]",0
2,4PanelCringe,"[-0.023192884, 0.0039603934, 0.010494498, -0.015115168, 0.006836462, -0.0257574, 0.062217627, -0.003969516, 0.019752953, -0.013393847, -0.009315065, -0.014945045, 0.0043270197, 0.007525347, -0.03317322, -0.0010646626, 0.020012263, -0.011337729, -0.012260013, -0.0053116763, -0.011579572, 0.03377392, 0.018386476, -0.008251519, -0.015098509, 0.007776889, 0.0002013597, 0.0042218273, -0.012523291, -0.0047490625, -0.011362211, 0.03457132, 0.005539218, 0.012535796, -0.020651136, -0.013078647, 0.01133792, 0.022744292, 0.004049748, -0.0032217214, 0.0057809227, -0.028601117, 0.0044739638, 0.0004291405, -0.0010671394, 0.012152756, -0.011778911, -0.013840539, 0.026533436, -0.01406982, -0.029432291, -0.030920757, -0.003413601, -0.00934403, 0.0029896398, 0.014994485, -0.0032579044, 0.0051108813, 0.024964232, 0.018867476, -0.005563573, 0.013376458, -0.010095878, 0.007834922, 0.023456518, -0.020910878, -0.0076802387, 0.000558006, -0.01619879, 0.022920784, 0.009963119, 0.015570447, -0.00830459, 0.027910661, -0.015695438, 0.0068163234, 0.005929091, -0.024902588, 0.01146984, 0.022460744, -0.010102586, -0.020960169, -0.0054418645, -0.0035604178, -0.004414795, -0.0075443047, 0.0075835157, -0.027635863, -0.00678159, -0.002211597, -0.029570624, -0.009576335, 0.026818966, 0.013858904, -0.006839696, -0.022065274, -0.017180715, 0.0027139594, -0.061458662, 0.07802418, ...]",0
5,ABCDesis,"[-0.007688805, 4.959004e-05, 0.0069934307, -0.0055983183, 0.0032650512, -0.011741787, 0.03038204, -0.014807505, 0.007947136, -0.012076037, 0.003230898, -0.01285105, 0.0019093927, 0.0009877848, 0.0014984016, -0.0013246085, 0.015998492, -0.029927028, -0.021650009, 0.0040712664, -0.023225667, 0.00967296, 0.013818662, 0.006238326, -0.0034466884, 0.0011465409, 0.006733888, 0.0024624155, -0.013791796, 0.006583616, -0.0018984022, 0.03268112, -0.007853035, 0.010652667, -0.015161123, -0.00362744, 0.008721226, 0.0186222, 0.011017967, -0.008408868, 0.012091029, -0.0031401874, 0.015855331, -0.004799152, -0.0063970066, -0.001979191, -0.018134931, -0.018342925, 0.016507896, -0.014161355, -0.009721644, -0.010209368, -0.0039509293, -0.005315116, 0.0035914986, 0.007120031, -0.0019474333, 0.009791052, 0.005880823, 0.013425245, -0.009770667, 0.013574024, -0.019821819, 0.017688211, 0.01876128, 0.00039993125, -0.014399096, 0.012133308, -0.00025717064, 0.010213571, 0.0066920137, 0.012365317, -0.0027927323, 0.026027655, -0.008657194, -0.0022087006, 0.012339492, -0.01857518, 0.016794272, 0.018440153, -0.013866387, 0.0075395554, -0.0011610696, 0.00012739225, -0.0007821403, -0.01607039, 0.0029952985, -0.017744888, 0.0048608202, 0.0019223391, -0.016739545, -0.0006333361, 0.034588225, 0.014100454, -0.008206557, -0.02138636, -0.015921546, 0.01225563, -0.054966502, 0.04753186, ...]",0
8,ATBGE,"[-0.031759586, 0.016073229, 0.012952226, -0.0059364685, -0.007115766, -0.028976666, 0.045852516, -0.011685191, 0.0067112367, 0.0011696491, -0.0009307842, -0.0037134315, -0.002714582, 0.015409734, -0.020670442, -0.0073766466, 0.019287081, -0.019024009, -0.0054080207, 0.011036226, -0.023079483, 0.021934414, 0.013421407, -0.0029324018, -0.024322832, 0.0090063, 0.006383552, 0.012492219, -0.011688638, -0.030145142, -0.014729366, 0.025341066, -0.0017222336, 0.0011394868, -0.0021659688, -0.011101958, 0.028034605, 0.004826693, 0.007245753, -0.0006814473, 0.0126229925, -0.013400828, 0.008640464, 0.0026947667, -0.014982094, 0.017907262, -0.008876124, -0.017748782, 0.028610634, -0.00932829, -0.0032872737, -0.022676365, -0.0006084841, -0.028205464, -0.0013790262, 0.004305208, -0.0061729806, -0.015712727, 0.008293316, 0.01588508, 0.011812952, 0.020090548, -0.0137733985, 0.011578329, 0.012502079, -0.015676895, -0.015510071, 0.005418412, -0.00342328, 0.016288519, 0.0005293114, 0.012649091, -0.0038250042, 0.023304025, -0.015049373, -0.0024454845, 0.01498468, -0.033161797, 0.007983033, 0.02769044, -0.01956916, -0.0045087165, 0.002988496, 0.00971088, -0.005990894, -0.0009802724, 0.008918693, -0.027104484, -0.0065758135, -0.019097252, -0.02257053, -0.00061108917, 0.016806405, 0.0060008555, -0.02453208, -0.008391273, -0.013392194, -5.1320665e-05, -0.06307875, 0.06487084, ...]",0
14,AnimalsBeingBros,"[-0.009656967, 0.007732864, 0.030942159, 0.0042703133, -0.0011498709, -0.018671587, 0.035496835, -0.025663245, 0.020155275, -0.0015953626, 0.0015812755, -0.017337969, 0.0050033024, 0.02919667, -0.021067103, 0.0031918888, -0.0033922466, -0.022868153, -0.020054244, 0.006103273, -0.026415301, 0.015883386, 0.018498592, 0.004010014, -0.03416394, 0.012626982, -0.0032642828, -0.0021548744, -0.0029155095, -0.0097029405, -0.025809733, 0.017220464, -0.002411811, 0.008881733, -0.011748037, -0.009806468, 0.017402427, 0.004129673, 0.019195799, 0.009535801, 0.008481538, -0.009244745, 0.017844228, -0.0033194216, -0.025815865, 0.008697231, -0.019510789, -0.034639575, 0.03907356, -0.009609266, -0.024963688, -0.013358994, 0.0011019106, -0.009289048, 0.003221676, 0.013107341, 0.008876481, -0.031123612, 0.014859779, -0.0032137986, -0.0042514843, 0.0431973, -0.0074117775, 0.018067295, 0.013539807, -0.010898899, -0.0186392, -0.0003669173, 0.0023592154, 0.026438914, 0.012834996, 0.029369082, 0.0041743736, 0.008879842, -0.021775555, 0.0042619067, 0.0113682505, -0.02471581, 0.037679624, 0.021014836, -0.018845871, -0.026979866, -0.00528149, 0.01454829, -0.006118106, -0.0036650756, 0.0012405493, -0.029725263, -0.026664607, 0.0008489036, -0.02697251, -0.013818244, 0.0011921523, 0.013835218, -0.028508779, -0.020463208, -0.012118404, -0.0076043596, -0.069147766, 0.054081563, ...]",0
...,...,...,...
476,woooosh,"[-0.0129443295, 0.0013837398, 0.022156728, 0.006545997, 0.014100412, -0.030983748, 0.059730142, -0.0035965436, 0.029076094, -0.014682758, -0.0044114254, -0.023629835, -0.009259306, -0.0026512, -0.018590296, 0.0063146153, 0.0094389785, -0.024563381, -0.035002023, -0.0063768704, 0.006983573, 0.04271767, 0.0006133397, -0.0021091306, -0.012785779, -0.0014078118, -0.01068187, 0.008756303, -0.0036222031, 0.009231203, -0.01694967, 0.03374699, 0.013272669, 0.02025821, -0.029444007, -0.014224739, 0.010494509, 0.016510934, 0.0064417035, 0.0040092454, -0.006676219, -0.028323503, 0.0031970146, 0.014455062, -0.0036676563, 0.014890516, -0.02477356, -0.0053931503, 0.03294348, -0.0059841103, -0.025382899, -0.03157319, -0.011571445, 0.003250818, 0.013763878, 0.010790413, 0.011418214, 0.0067928447, 0.01988885, 0.012756445, 0.015231614, 0.009809553, -0.021391554, 0.006399033, 0.019836433, -0.01377642, -0.016981626, -0.0002823081, -0.035105303, 0.03954031, 0.021754473, -0.0046791816, 0.010570521, 0.02423409, 0.0009232339, -0.008853686, 0.005426913, -0.0212765, 0.03831093, 0.023934402, -0.012454712, -0.01644637, -0.02770862, -0.005828808, -0.0026559958, -0.009923937, -0.0074966457, -0.0089661125, -0.01670717, -0.005350527, -0.02841515, -0.020044101, 0.012308354, 0.015879238, -0.004778804, -0.024709731, -0.017020103, 0.022792647, -0.06704604, 0.07467559, ...]",0
479,yesyesyesyesno,"[-0.0083622085, 0.011073343, 0.018445078, -0.003998922, 0.014793307, -0.021117568, 0.05252335, -0.011913042, 0.019400515, -0.012024384, 0.0024549381, -0.006599366, 0.0020649165, 0.014543544, -0.0255699, -0.0017111102, 0.008479045, -0.01408407, -0.023001758, 0.0106674535, -0.014053538, 0.030753447, 0.012457154, 0.0027908986, -0.017781103, 0.016602764, 0.0027288073, 0.0072212573, -0.007086704, -0.014541867, -0.0031740316, 0.012919709, -0.009250446, 0.004120989, -0.014303699, -0.007763932, 0.01797379, 0.0137826195, 0.012575211, -0.00043647928, 0.0037075735, -0.032460347, 0.008484618, 0.0022219967, -0.0026499436, 0.012095242, -0.007815583, -0.024678346, 0.04135658, -0.016729465, -0.022172786, -0.02061368, -0.0002475175, -0.009049874, 0.012084131, 0.007026227, 0.014259716, -0.011723708, 0.0211036, 0.0070128595, 0.00094823295, 0.019532697, -0.011291425, 0.017053412, 0.016956309, -0.00947581, -0.0036092438, -0.017910829, -0.00046537767, 0.040502857, 0.015275095, 0.0076141194, -0.0012790365, 0.015093308, -0.019042887, 0.0018480936, 0.009466989, -0.02500801, 0.026029471, 0.028044669, 0.0010464957, -0.03296583, -0.0029585545, 0.007369537, -0.007894893, -0.0033773582, 0.009061391, -0.023171145, -0.0048105856, 0.0021707648, -0.030004548, -0.008528774, 0.005330616, 0.017408133, -0.02074167, -0.02431473, -0.025190774, -0.0027861844, -0.060355585, 0.06268799, ...]",0
480,youseeingthisshit,"[-0.014335996, 0.014376306, 0.0155483335, -0.004478148, 0.0048909863, -0.02467696, 0.054850724, -0.0049939873, 0.009657817, -0.012517246, 0.0051483796, -0.01693221, -0.006197467, 0.01457819, -0.02272218, 0.003836879, 0.00046916434, -0.010009826, -0.022212423, 0.008787637, -0.014744695, 0.0315949, 0.017257366, 0.0039907196, -0.013444219, 0.012192348, 0.005986109, 0.004745931, -0.004493389, -0.009723091, -0.015265348, 0.016833752, -0.008834063, 0.0021351313, -0.023313535, -0.010370371, 0.005790748, 0.011841264, 0.0074268547, -0.00032327342, 0.0059947525, -0.01781201, 0.009331639, 0.0063738767, -0.011674794, 0.016203264, -0.0054117357, -0.023940844, 0.036298692, -0.009160119, -0.02397236, -0.023577796, 0.006926105, -0.017023928, 0.0044887285, 0.0074994955, 0.017616836, -0.008776411, 0.016358757, 0.017532641, -0.0007234236, 0.019681973, -0.018892916, 0.018690454, 0.014927712, -0.0129392, -0.009690455, -0.008470396, 0.006099041, 0.026479892, 0.01942384, 0.007798536, -0.0025844646, 0.013060509, -0.020168683, 0.0023006683, 0.013914035, -0.026543045, 0.022862708, 0.024108732, -0.0077491286, -0.029773664, -0.0046148864, 0.0075406884, -0.004399504, -0.0004631553, 0.0034152449, -0.022435032, -0.010099694, 0.005427206, -0.030581502, -0.012022739, 0.0055551413, 0.0089400625, -0.025775027, -0.017435824, -0.016238572, -0.008830844, -0.061916888, 0.062757105, ...]",0
481,youtube,"[-0.023860248, -0.022737052, 0.0056497906, -0.02203018, 0.025539566, -0.01015595, 0.013050148, -0.024837367, 0.010369463, -0.016271632, 0.0027394197, 0.017588852, -0.007084928, 0.021250762, -0.008509088, 0.0071599428, 0.0023678693, -0.022836454, -0.039534092, -0.0077692745, -0.012763517, -0.0052775047, 0.00024807145, 0.007160397, -0.0012998573, -0.000651506, -0.015044338, 0.018097132, -0.0025495482, -0.0085455505, -0.0093915695, 0.02879305, 0.010775709, 0.0017451529, -0.004916429, -0.020089326, -0.0038798687, 0.0054346872, -0.013535374, -0.015334826, 0.011285564, -0.011144704, 0.0061205234, 0.00072312506, -0.019059772, 0.012625619, -0.010344554, -0.031931948, 0.036705058, -0.004648227, -0.017923191, -0.03447158, -0.005220498, -0.007883278, -0.0077883867, 0.005263267, -0.0028756536, 0.019157976, 0.03925445, 0.008549662, 0.015895575, 0.0025575706, -0.025689904, 0.022480823, 0.027137939, -0.0018925758, -0.011726916, -0.0037741028, -0.0133304205, 0.009074782, 0.0015313888, 0.014069167, -0.0024052993, 0.022653732, -0.010310258, 0.00763165, 0.0027974579, -0.02105408, 0.0065332586, 0.004811504, 0.0032661874, -0.021022966, 0.00010446987, 0.0034728567, 0.016288057, 0.01196628, 0.010004775, -0.021540696, -0.0027630616, 0.00046249217, -0.019934217, 0.011554197, 0.030988337, 0.018899107, -0.023249585, -0.021628195, -0.0094218645, 0.000116256124, -0.05344411, 0.060387116, ...]",0


In [22]:
humor = list(subreddit_embeddings[subreddit_embeddings['cluster']==0]['subreddit'])
print(humor)

['2meirl4meirl', '4PanelCringe', 'ABCDesis', 'ATBGE', 'AnimalsBeingBros', 'AnimalsBeingJerks', 'BetterEveryLoop', 'Blackfellas', 'BreadTube', 'Cardinals', 'ComedyCemetery', 'DiWHY', 'Documentaries', 'ExpectationVsReality', 'FellowKids', 'FrankOcean', 'FunnyandSad', 'HadToHurt', 'HighQualityGifs', 'HumansBeingBros', 'IASIP', 'IDontWorkHereLady', 'IdiotsInCars', 'Instagramreality', 'Jokes', 'Justfuckmyshitup', 'KidsAreFuckingStupid', 'MaliciousCompliance', 'Military', 'My600lbLife', 'ProRevenge', 'Roadcam', 'RoastMe', 'SubredditDrama', 'SubredditSimulator', 'SweatyPalms', 'TIHI', 'TheSimpsons', 'Tinder', 'ToolBand', 'TrollXChromosomes', 'TwoSentenceHorror', 'Unexpected', 'WatchPeopleDieInside', 'WeWantPlates', 'Wellthatsucks', 'Whatcouldgowrong', 'WhitePeopleTwitter', 'WhyWereTheyFilming', 'WouldYouRather', 'antiMLM', 'awfuleverything', 'barstoolsports', 'bestoflegaladvice', 'bodybuilding', 'circlejerk', 'cringe', 'cringepics', 'dadjokes', 'delusionalartists', 'disneyvacation', 'dogswith

In [23]:
l = 0
for s in humor:
    l += len(get_text_from_subreddit(dataset, s))

print(l)

13791


In [24]:
clusters.loc[clusters['cluster']=='0', 'Number of samples'] = l

In [25]:
clusters

Unnamed: 0,cluster,topic label,Number of samples
0,0,Humor,13791.0
1,1,Other,
2,2,Sport,
3,3,Politics,
4,4,Love and relationship,
5,5,Film and Tv series,
6,6,Videogames,


In [26]:
subreddit_embeddings['cluster'].value_counts().sort_index()

cluster
0    112
2     40
3     89
4     65
6     40
7     68
8     22
9     47
Name: count, dtype: int64

### Cluster 1 (Other)

In [27]:
subreddit_embeddings[subreddit_embeddings['cluster']==1]

Unnamed: 0,subreddit,embedding,cluster


In [28]:
# cluster 7 -> cluster 1
subreddit_embeddings.loc[subreddit_embeddings['cluster']==7, 'cluster'] = 1

In [29]:
subreddit_embeddings[subreddit_embeddings['cluster']==1]

Unnamed: 0,subreddit,embedding,cluster
11,AirForce,"[-0.012958019, -0.0069180746, 0.018451672, 0.0009916542, 0.017646557, -0.021794278, 0.0010664454, -0.00628202, -0.0035156063, 0.002326863, 0.0066808467, -0.0015685555, -0.00061749044, 0.020783747, -0.010334581, 0.0019829178, 0.017770173, -0.0187666, -0.017817834, 0.009877738, -0.0061971126, 0.009666472, 2.6805386e-05, 0.0049631256, -0.0065777134, 0.008989308, -0.008255654, 0.020244556, -0.011506865, -0.011309002, -0.02181735, 0.025937589, 0.015929837, 0.004559538, 0.011831123, 0.010093267, 0.017419208, 0.004937216, 0.0066709733, -0.0073600477, 0.0012438388, -0.013577104, 0.014145654, 0.016368054, -0.00073677534, -0.002814681, -0.0025655474, -0.023928039, 0.02532541, 0.0024689927, 0.008815608, -0.03115194, -0.010804708, -0.0035275903, -0.00023483431, 0.012075771, 0.003675383, -0.0006788646, 0.0031562864, -0.0026064438, -0.010218615, 0.003150089, -0.027151247, 0.017197454, 0.011475104, 0.0028776454, -0.0061341533, 0.008061484, 0.006613307, 0.0020716474, -0.006109671, 0.0026271741, 0.00707731, 0.028696937, 0.0027800757, 0.023865, 0.0016895417, -0.014934791, 0.040216904, 0.0046215444, -0.0055862824, -0.006989507, -0.0061541214, -0.005099175, -0.0023583155, -0.024220387, 0.0042667408, -0.0058325212, 0.015358214, -0.0025558977, 0.0030779743, 0.00018593844, 0.025883332, 0.015202893, -0.03741629, -0.013019238, -0.023500266, 0.0124651315, -0.050032042, 0.04094905, ...]",1
16,Anticonsumption,"[-0.022477584, 0.0036739495, 0.012291948, -0.0072236676, 0.003764357, -0.025016276, 0.012985981, -0.006268739, -0.0045521585, -0.0010381355, -0.0043603056, 0.039055053, 0.0070561073, 0.023062753, -0.028608723, -0.005298428, 0.011578347, -0.024696179, -0.0175849, -0.0025409437, -0.01387831, 0.008160904, 0.018801572, -0.009282779, -0.018044349, 0.016798427, -0.0061958786, 0.006005861, -0.022681544, -0.01531192, -0.004703216, 0.027922247, -0.0034550352, -0.011018619, -0.0052073374, 0.0068552233, 0.025496772, 0.0021923322, 0.008330526, 0.0013591166, -0.0054605613, 0.002365284, 0.016168624, 0.0045748395, -0.019632215, 0.0057937154, -0.00096087076, -0.02171194, 0.029842554, -0.0051424857, -0.0074149696, -0.0129286535, -0.017604686, -0.034504913, -0.010748338, 0.015895817, -0.012692672, -0.007863778, 0.0069235004, -0.0083334, 0.020346304, 0.002337122, -0.008329173, 0.02011764, 0.026229225, -0.0042799083, -0.015473554, 0.01838112, -0.012235214, 0.005112015, -0.013204487, 0.024074899, -0.0027325628, 0.033518612, -0.011821434, 0.009654809, 0.02201902, -0.03761056, -0.00015438865, 0.019461904, -0.019313723, 0.0040772, -0.0009764564, -0.000840242, -0.0047618756, -0.014509122, -0.0010756779, -0.026974956, -0.004269645, -0.009806287, -0.011158184, -0.0021346598, 0.023027645, 0.014994702, -0.018984042, -0.005916492, -0.013762516, -0.0022816912, -0.05634011, 0.04268157, ...]",1
25,Austin,"[0.0060520344, 0.006788532, 0.025984237, 0.006586112, 0.0065978845, -0.01956996, 0.006377114, -0.02494813, -0.008559187, -0.0031299682, 0.017262133, 0.010938329, 0.013373616, 0.0051856874, -0.010141937, -0.0072025363, 0.014725178, -0.029835658, -0.011648055, 0.002375717, -0.010861316, 0.017871477, -0.0020556997, 0.014115174, -0.008388866, 0.018532101, -0.013822462, 0.0129696885, -0.018526742, -0.00498112, -0.011858051, 0.040165477, 0.0031268774, 0.009252624, 0.012718369, 0.00021709027, 0.021509167, -0.0071818843, 0.015974987, 0.00066311815, -0.0013249946, -0.016884804, 0.008115332, 0.009239311, -0.012531657, 0.0055770637, -0.012001957, -0.021469133, 0.035049874, -0.0141382795, 0.011365543, -0.010761786, 0.0026272328, -0.0086800605, -0.0076787826, 0.0076072263, -0.0074991807, -0.008125783, 0.010382127, 0.010737461, 0.012908555, 0.011166551, -0.029346874, 0.020506833, 0.012987025, -0.003683116, -0.02837827, -0.002021634, 0.0119922105, 0.0014771539, 0.013632246, 0.006393906, 0.010604181, 0.020190341, 0.005368974, 0.011721891, 0.017205596, -0.017075635, 0.021159438, 0.0015173693, -0.013958084, -0.019875485, -0.009511497, 0.0051114103, -0.004385428, -0.017188618, 0.0014511587, -0.012000091, 0.009410833, -0.00357612, -0.0146376435, 4.6807112e-05, 0.007994652, 0.005708124, -0.026468543, -0.014732648, -0.029378237, 0.0033598263, -0.05500799, 0.054564055, ...]",1
45,Calgary,"[0.0039175814, -0.0011089683, 0.012383794, -0.0016053244, 0.0048184725, -0.014537938, 0.0076694004, -0.011892564, -0.014268567, -0.007549343, 0.0070098545, 0.009392295, 0.0043126983, 0.012393009, 0.00029029648, -0.00040561683, 0.016553104, -0.027991267, -0.0028893906, 0.00038242075, -0.007571383, 0.0073836246, 0.012563666, 0.0025474357, -0.0045419596, 0.009510239, -0.010668381, 0.016328579, 0.0017010138, -0.011460289, -0.019580865, 0.0334169, 0.0019542535, 4.8017304e-05, 0.009105012, -0.00080827426, 0.010910238, 0.0027992649, 0.010225703, -0.009263323, 0.015114603, -0.0053404993, -0.0010219916, 0.004990325, -0.01323839, -0.009187842, -0.005965365, -0.014539016, 0.033909347, -0.009894983, -0.0061121536, -0.010684373, -0.004689749, -0.018587645, -0.009285596, 0.0045808456, -0.0100986445, -0.017677031, 0.008157092, 0.0066357907, 0.0026086725, 0.0077159475, -0.02746964, 0.011330083, 0.023649579, 0.0009545582, -0.01998771, 0.007307025, -0.00020363908, -0.0035640139, -0.0074482923, 0.011595325, 0.005387273, 0.0052413684, 0.0028213628, -0.0033424424, 0.01715979, -0.01694851, 0.019091085, 0.008396329, -0.010013246, -0.0090312315, -0.007867027, 0.016287949, -0.009159847, -0.016325755, -0.008076191, -0.0059232474, 0.012821651, 0.0014085028, -0.001974644, -0.011002961, 0.025490537, 0.0036528462, -0.00848993, -0.017701004, -0.015026466, 0.012999685, -0.04176495, 0.030820824, ...]",1
51,ChoosingBeggars,"[-0.00854468, 0.009017594, 0.015628416, -0.0014443231, 0.0017036148, -0.012618002, 0.02200273, -0.0061156484, 0.0030848347, -0.009453716, 0.0075990427, -0.0060886415, -0.0004908638, 0.0048521087, -0.01578211, 0.0030872899, 0.01906844, -0.032243345, -0.022294877, 0.012991467, -0.014633887, 0.0034558582, 0.012268178, 0.0065463036, 0.008084655, 0.0027246538, -0.0073699476, 0.0074524446, -0.014733541, 0.015762461, -0.0066767116, 0.03691161, 0.0017491237, 0.014425835, -0.005684846, -0.009983209, 0.01209594, 0.0077849515, -0.0015270901, -0.006581116, 0.00021729518, -0.00835564, 0.00047274033, 0.0016495192, -0.017224407, 0.0031129497, -0.0015520302, -0.0049503245, 0.0254968, -0.021367304, -0.013413188, -0.017233942, -0.014058713, -0.010701992, 0.004421716, 0.008449238, 0.007213225, 0.0033112247, 0.016576447, 0.012520637, -0.007552945, 0.012585261, -0.019580146, 0.02091511, 0.015198745, 0.009068569, -0.019342853, -0.003348143, -0.011372195, 0.007901952, 0.003303743, 0.010671471, 0.0041948403, 0.032822788, -0.008806115, 0.0017525406, 0.020931737, -0.013942515, 0.020595228, 0.015541048, -0.020993102, -0.016944775, -0.00083281245, 0.0036650794, 0.00087692, -0.020090418, 0.0013553825, -0.022198284, 0.014753862, -0.0041832817, -0.015461073, 0.004344428, 0.021376433, 0.0063154013, -0.03023618, -0.018105004, -0.02370081, 0.012268356, -0.06627255, 0.048099324, ...]",1
...,...,...,...
462,vancouver,"[0.0012016976, 0.0033216449, 0.033800922, 0.0030876254, 0.009841012, -0.008415565, 0.00045596113, -0.019122573, -0.018375108, -0.0067338827, 0.013034707, 0.024141116, -0.0032681131, 0.0154534485, -0.012911425, -0.0063603614, 0.029461177, -0.040751588, -0.0061508985, 0.007002774, -0.016453613, -0.001165167, -0.0011020623, 0.011581256, 0.0016755154, 0.013185632, -0.003991103, 0.02364366, -0.0045978725, -0.0037753037, -0.024370348, 0.026542086, -0.0035869677, 0.0037142262, 0.0049868915, -0.00028609906, 0.025541553, -0.011773793, -0.0099788485, -0.001070895, 0.012332676, -0.008960144, 0.0028732605, -0.010005134, -0.016636768, 0.004722366, 0.002890409, -0.0043323752, 0.032128192, -0.013904652, 0.010921481, -0.012561037, -0.0018613054, -0.018264372, 0.0027071482, 0.009984464, -0.007371828, -0.009725485, 0.01497405, 0.0010896804, 0.007999965, 0.0070493273, -0.026328487, 0.016983928, 0.013047343, 0.0036704422, -0.010889369, 0.0059916745, 0.0035046584, -0.0074381963, -0.0028284634, 0.012591747, 0.007712121, 0.014395437, -0.009365485, 0.00023653274, 0.01565088, -0.02289395, 0.013999442, 0.0050996384, 0.0028730708, -0.008731035, -0.0035060986, 0.012723039, -0.0020060786, -0.016662076, -0.0016084191, -0.005937409, 0.011571347, -0.0013079977, -0.008483567, 0.0038947784, 0.00838247, 0.006459404, -0.022586614, -0.01429994, -0.017612603, 0.01150308, -0.045423977, 0.034109175, ...]",1
465,vegan,"[-0.008826227, -0.0077433516, 0.007759572, 0.015259919, 0.0064690025, -0.014563111, -0.0010187018, -0.017187951, 0.00014100889, -0.019144848, 0.018007226, -0.0137337465, -0.01936817, 0.006307973, -0.015583236, -0.017577495, 0.03398315, -0.019714104, -0.005237851, 0.018548667, -0.021115048, 0.0054848953, 0.027439307, 0.008071061, -0.006822931, -0.0011625635, 0.008649574, 0.0023523641, -0.016347375, -0.012597248, -0.017241592, 0.029129677, 0.0019932408, -0.0012962857, -0.006065749, -0.0027464132, 0.02372443, -0.020901086, 0.015549342, -0.0054264185, 0.01821084, -0.015556407, 0.02325822, -0.012296316, -0.0045907167, 0.016059902, -0.01692029, -0.016590567, 0.033215903, -0.016533827, -0.024515152, -0.025142966, -0.011451312, -0.021812906, 0.004453156, 0.0042348495, -0.007027979, -0.01944054, 0.005108153, 0.007817123, 0.010129649, 0.009513789, -0.029465914, 0.02249874, 0.01245058, -0.010163717, -0.011701188, 0.008286508, -0.022134362, -0.0011868199, -0.009861025, 0.00891448, 0.01937579, 0.031891905, -0.021846892, 0.02044588, 0.016014453, -0.032224063, 0.019544853, 0.017347634, -0.009675782, -0.00031192467, -0.001104045, 0.010945015, -0.007821455, -0.005331035, -0.0036398098, -0.0030663884, 0.012895042, -0.0026776197, -0.0011294176, 0.00027083824, 0.022049796, 0.021285774, -0.023241373, -0.013306851, -0.012108968, 0.0023783466, -0.059724376, 0.025974954, ...]",1
468,walmart,"[-0.00983501, -0.00505963, 0.012081677, 0.012069675, 0.014651071, -0.026871752, 0.016531214, -0.018301697, 0.0009373442, -0.019578211, 0.008799486, 0.01303582, -0.0013530138, 0.018057566, -0.002129478, -0.019130062, 0.005503925, -0.028313046, -0.022348395, 0.0035738233, -0.02074424, 0.0022338666, -0.0040268376, 0.018445643, -0.016669145, 0.019231688, -0.016998896, 0.013369231, -0.020855943, -0.018948847, -0.01020863, 0.017059451, -0.0028195016, 0.012455167, 0.007549667, 0.0019133011, 0.012773856, -0.0054554525, 0.013959621, -0.009191189, 0.0075767403, -0.018782154, 0.008013369, 0.01789619, -0.01505394, 0.004063283, -0.002302436, -0.008453617, 0.032253046, -0.0025678098, -0.003906313, -0.014842303, -0.00790542, -0.0021530937, 0.006890772, 0.021802472, 0.0041587492, -0.0044110706, 0.013407679, 0.022538442, 0.0011493801, 0.008283485, -0.01930805, 0.024380174, 0.01134811, 0.0043842797, -0.013810206, 0.0071848636, -0.001890394, 0.005224576, -0.005587296, 0.0081338035, -0.00104457, 0.015104665, -0.0037974222, 0.016313426, 0.013976972, -0.036834218, 0.032664143, 0.012721184, -0.032520637, -0.022131119, -0.0021441882, 0.013622066, 0.0028997213, -0.00023499125, 0.004284537, -0.0026557462, 0.02171955, -0.009685198, -0.00043383084, -0.01734113, 0.016081056, 0.011459916, -0.032965243, -0.016637592, -0.02050315, 0.014203737, -0.052603327, 0.045308538, ...]",1
469,washingtondc,"[-0.0033738562, -0.0043716165, 0.018614773, 0.0059265117, 0.011939892, -0.012249142, -1.0484612e-05, -0.02857835, -0.010265917, -0.009916066, 0.005037734, 0.003588469, -0.0010638799, 0.00934707, -0.006481133, -0.0054219402, 0.013959235, -0.02694007, -0.010662752, 0.0086935675, -0.010525609, -0.0018612363, -0.0024787984, 0.004515039, -0.014119492, 0.0063565555, -0.014442339, 0.028285027, -0.01347996, -0.017194016, -0.010617998, 0.025255648, -0.0097777555, 0.011103527, 0.020503072, -0.0022371542, 0.013769981, -7.629665e-05, 0.007758738, -0.00077137566, 0.011774658, -0.0087014865, 0.007444388, 0.007912644, -0.014224894, 0.011756471, 0.0013284406, -0.02026353, 0.030461008, 0.00069812115, 0.005347634, -0.009963332, -0.004400783, -8.827923e-05, -0.001208062, 0.012229488, -0.008176931, -0.012172221, 0.0040198844, -0.0020279721, -0.0008009129, 0.010449425, -0.033925734, 0.025807211, 0.018443082, -0.006386557, -0.02177624, 0.0107666245, 0.007773668, -0.018270953, 0.010911579, 0.0125261815, 0.015635652, 0.012107569, 0.0038167099, 0.009679992, 0.008277197, -0.01515665, 0.01678469, 0.009029728, -0.010881081, -0.0193428, -0.005679638, 0.0064001917, -0.018678384, -0.010042458, -0.0020829458, -0.0026208705, 0.022905061, -0.0044399397, 3.48229e-05, 0.006433517, 0.019174572, 0.014749499, -0.03548337, -0.014591857, -0.013814963, 0.01205905, -0.05937377, 0.03930004, ...]",1


In [30]:
other = list(subreddit_embeddings[subreddit_embeddings['cluster']==1]['subreddit'])
print(other)

['AirForce', 'Anticonsumption', 'Austin', 'Calgary', 'ChoosingBeggars', 'Connecticut', 'Dallas', 'Denver', 'Detroit', 'Edmonton', 'Flipping', 'GifRecipes', 'InstacartShoppers', 'KitchenConfidential', 'LifeProTips', 'LosAngeles', 'NewOrleans', 'Portland', 'ProtectAndServe', 'QuadrigaCX', 'SaltLakeCity', 'SeattleWA', 'StLouis', 'TalesFromRetail', 'TalesFromYourServer', 'Target', 'Teachers', 'Truckers', 'askcarsales', 'australia', 'benzodiazepines', 'brisbane', 'britishproblems', 'btc', 'canadients', 'chicago', 'darknet', 'doordash', 'ethtrader', 'houston', 'ireland', 'jobs', 'juul', 'korea', 'legaladvice', 'london', 'melbourne', 'minnesota', 'nashville', 'newjersey', 'newzealand', 'nursing', 'nyc', 'perth', 'phoenix', 'pittsburgh', 'sanfrancisco', 'southafrica', 'starbucks', 'sydney', 'texas', 'toronto', 'uberdrivers', 'vancouver', 'vegan', 'walmart', 'washingtondc', 'weed']


In [31]:
l = 0
for s in other:
    l += len(get_text_from_subreddit(dataset, s))

print(l)

5839


In [32]:
clusters.loc[clusters['cluster']=='1', 'Number of samples'] = l

In [33]:
clusters

Unnamed: 0,cluster,topic label,Number of samples
0,0,Humor,13791.0
1,1,Other,5839.0
2,2,Sport,
3,3,Politics,
4,4,Love and relationship,
5,5,Film and Tv series,
6,6,Videogames,


### Cluster 2 (Sport)

In [34]:
subreddit_embeddings[subreddit_embeddings['cluster']==2] #sport

Unnamed: 0,subreddit,embedding,cluster
7,AFL,"[-0.012297828, -0.00060421904, -0.008414087, -0.018882267, 0.016993154, -0.0010358654, 0.04344437, -0.005131242, -0.008717113, 0.001504226, -0.033724684, -0.016960237, 0.00041312055, 0.020144595, -0.009974679, 0.009131653, 0.0016041128, -0.027630936, -0.0006431617, -0.010434832, -0.024380283, 0.02390933, -0.018400509, 0.0070949765, -0.014729497, 0.0056430106, -0.007975143, 0.027667882, -0.012850241, -0.017531455, -0.0062555773, 0.04199416, 0.018740445, 0.008254486, -0.018111434, -0.019052532, -0.010761204, 0.0039838892, -0.0077459337, -0.008689191, 0.02856639, -0.027199782, -0.00419254, 0.01986054, -0.01774702, -5.9711747e-05, -0.0018267494, -0.024870379, 0.009796092, 0.026341593, -0.02290974, -0.011651708, 0.0042538596, -0.016690113, 0.007830553, 0.034959678, -0.013920025, -0.0059651053, 0.0041742236, -0.007782833, 0.0018726853, 0.011532291, -0.05210236, -0.00023045059, 0.006417493, -0.020902317, -0.025449455, 0.022250658, 0.010743278, 0.013003652, 0.024687778, 0.0032347993, 0.011580168, -0.0060339463, 0.0012071735, 0.018033093, 0.004475032, -0.015810842, 0.03276503, 0.010764859, -0.0062289005, -0.023200057, -0.02276081, -0.00765531, -0.0023535984, 0.0047808033, -0.0076981834, -0.01919262, 0.008049161, -0.003773391, -0.019369952, -0.017437717, 0.031862304, 0.012674909, -0.0066905185, 0.010713278, -0.009491305, 0.007610279, -0.07093891, 0.07195311, ...]",2
24,AtlantaHawks,"[-0.0315351, 0.0014153741, -0.01976336, -0.038198095, 0.005658214, -0.019273447, 0.08157149, 0.00065174344, 0.017463373, -0.015803155, -0.041174695, -0.030677766, 0.007104755, 0.014713469, -0.0017132455, 0.014855582, 0.019448437, 0.013877335, -0.007706429, -0.012939681, -0.018769719, 0.02788166, -0.0033413805, -0.0032936125, -0.008951711, 0.004672571, 0.007185096, 0.017069176, -0.008688326, -0.031370047, -0.0009888352, 0.025514042, 0.014755453, 0.011058745, -0.027644863, -0.018373372, -0.03143744, 0.039328463, -0.00850512, -0.016942488, 0.03155596, -0.036294222, -0.0007055989, 0.024533655, -0.04051224, 8.5347034e-05, -0.014421494, -0.024255754, 0.03312818, 0.011838611, -0.047509953, -0.02074461, 0.013761496, 0.0064141206, 0.0066576516, 0.03934354, -0.0041241543, 0.0020466854, 0.01885187, 0.019672986, -0.0010653331, 0.009582471, -0.02474562, -0.016847542, 0.0061981296, -0.028462667, -0.007227445, 0.00048526478, 0.010221528, 0.030476172, 0.037682362, 0.025957564, -0.010543519, -0.022674538, -0.003867771, 0.022691103, -0.015963554, -0.031762287, 0.017123848, 0.02312578, -0.009901335, -0.016753137, -0.023263026, 0.004265947, -0.029990643, 0.03203627, -0.020615406, -0.0015990398, 0.009266502, -0.012815217, -0.013546166, -0.025056073, 0.033745747, 0.0015148405, -0.0016077937, 0.011543222, -0.010542994, -0.016444752, -0.08104232, 0.07143829, ...]",2
28,Barca,"[-0.025670474, 0.01977418, -0.01190819, -0.016351767, 0.0156090455, 0.0042018015, 0.069448605, 0.0008320357, 0.008617658, -0.017602244, -0.025469061, -0.01659686, 0.011655771, 0.020454764, -0.010726174, 0.010855266, -0.008453026, -0.009352836, -0.0042385855, -0.019094465, 0.003905233, 0.029229602, -0.010185215, 0.0015092741, -0.008361664, 0.015419983, -0.011307665, -0.00075201225, -0.0012750279, -0.043132674, 0.015588519, 0.034438457, 0.018698756, 0.01290723, -0.02843792, -0.015712414, -0.021037295, 0.020707333, -0.012740027, -0.019587459, 0.024416378, -0.03810207, -0.016648458, 0.022638189, -0.01706956, 0.0015465618, 0.009305166, -0.026018877, 0.019060668, 0.017649144, -0.04465892, -0.009309109, 0.0044891103, 0.005549996, -0.0046303235, 0.027086545, 0.0031662583, -0.009357456, 0.009259983, 0.037095785, -0.016005792, 0.022116352, -0.04561835, -0.0048254086, 0.0021618775, -0.046244983, -0.012419529, 0.0032831447, 0.007754066, 0.05484307, 0.022164145, 0.014832152, 0.0062472397, -0.018652376, -0.010860168, 0.004668108, -0.030769523, -0.024524374, 0.022709655, 0.012929849, -0.009054198, -0.028219758, -0.032289892, -0.008197346, 0.0027919444, 0.03463212, -0.012433803, -0.017071754, 0.007422681, -0.019346394, -0.018142432, -0.031083612, 0.04398463, 0.041468196, 0.004555261, 0.0065784627, 0.005463173, -0.00851715, -0.08467463, 0.075523786, ...]",2
54,CollegeBasketball,"[-0.018680967, 0.0050240406, 0.007676013, -0.024997827, 0.017052317, -0.013486994, 0.036636874, -0.013881697, 0.024971016, -0.005539044, -0.037447553, -0.010022927, 0.0033590393, 0.0075908964, -0.012459257, -0.00322582, 0.00976164, -0.022481767, -0.013329657, -0.006352337, -0.010901486, 0.028685391, -0.010027764, 0.002422173, 0.0041182633, 0.0030602915, 0.0015420662, 0.015421153, -0.009411643, -0.026801491, -0.014955542, 0.009305714, 0.018063068, -0.0061335266, -0.020974778, -0.013846225, -0.012998741, 0.010557299, -0.0053201546, -0.0009243839, 0.012896214, -0.026558608, 0.011249179, 0.026113516, -0.016143952, 0.007525232, -0.012441145, -0.020151116, 0.039784957, 0.0070035974, -0.016898634, 0.0035894671, 0.0027389135, -0.0009804486, 0.018618165, 0.039640505, 0.008732071, -0.008364339, 0.0048232386, 0.017313147, 0.0012993289, 0.008507857, -0.021332672, 0.005746133, 0.003081669, -0.016984643, -0.011843587, 0.013532731, -0.00377728, 0.028177736, 0.028148228, 0.003608714, 0.008188349, -0.0060091275, 0.010447035, 0.02457392, -0.016205367, -0.017056547, 0.03561046, 0.026797218, 0.00091892434, -0.03706311, -0.009895974, -0.01016575, -0.014835434, 0.008175764, -0.008549885, -0.004556548, 0.024641113, 0.0001872304, -0.024069564, -0.028158912, 0.03186371, 0.0128010735, 0.0022615013, 0.0012793092, -0.015172064, -0.013599458, -0.054782867, 0.070696265, ...]",2
68,DetroitPistons,"[-0.025840648, 0.0065118275, -0.0143436035, -0.032390594, 0.008790932, -0.012231713, 0.068754576, -0.00225043, 0.009761929, -0.025507014, -0.041825395, -0.013391923, 0.0029713707, 0.016249487, -0.00500844, 0.013140947, 0.018222664, 0.0042128875, -0.007245307, -0.012411947, -0.015471171, 0.037848502, -0.0044215475, -0.002925279, -0.006337614, 0.0006137614, -0.007708845, 0.012967926, -0.013038266, -0.038622476, 0.004831423, 0.023742832, 0.022219088, 0.011596796, -0.020323722, -0.019858751, -0.037580762, 0.03699109, -0.016860198, -0.020302186, 0.030614479, -0.040495537, -0.012535675, 0.019155236, -0.030100254, -0.001283363, -0.007745363, -0.026434349, 0.026387507, 0.0070077954, -0.045216873, -0.010863047, 0.01650679, 0.0040663253, 0.0064008576, 0.041301202, 0.0014692311, -0.0015604064, 0.01603685, 0.019667663, -0.00521267, 0.009221699, -0.03781341, -0.004431507, -0.0005177813, -0.030178657, -0.0040299413, 0.0021939501, 0.0029314314, 0.03411958, 0.04103335, 0.019695576, -0.0044710515, -0.027284885, -0.00011975534, 0.010701339, -0.01945743, -0.019406738, 0.029303128, 0.023749964, 0.006543688, -0.028969571, -0.0293735, -0.0043944116, -0.012924903, 0.04263489, -0.015847262, -0.010115497, 0.016994586, -0.0065090414, -0.020285899, -0.029983891, 0.041143324, 0.006058892, 0.012373875, 0.01370474, 0.00087988464, -0.014428469, -0.082390375, 0.07677034, ...]",2
81,EdmontonOilers,"[-0.020638283, 0.004433748, -0.0066108597, -0.02069466, 0.014647142, 0.002020198, 0.04584252, 0.0042161797, 0.018210156, -0.014348884, -0.048553277, -0.0063286554, -0.0025130634, 0.025089454, -0.0019537155, 0.0066398466, -0.0045672995, -0.004285278, -0.004614494, -0.0019209507, -0.016911663, 0.023351168, -0.01601038, -0.0034994695, 0.012767185, 0.0037239743, -0.0062742825, 0.016604505, -0.008625219, -0.025387576, -0.000113941525, 0.015614429, 0.015953524, 0.0044821333, -0.009435797, -0.0071955156, -0.027479354, 0.018408066, -0.0027050315, -0.0025056917, 0.030959675, -0.026151668, -0.0096919555, 0.019017795, -0.024591966, -0.0003398486, -0.015304601, -0.022306522, 0.02022604, 0.0062709833, -0.029214995, -0.0035491453, 0.011014784, -0.013678197, 0.022735853, 0.03915955, 0.00085911405, -0.01062171, 0.011569978, 0.01271693, -0.003980185, 0.007156391, -0.029048374, -0.004282899, -0.00804681, -0.021943638, -0.01562835, 0.00081591326, 0.0045524472, 0.027645333, 0.030531658, 0.012768078, -6.3764564e-05, -0.018611606, -0.0022749742, 0.009747898, -0.014608368, -0.01917855, 0.03173617, 0.01847302, 0.0037530595, -0.03136212, -0.01917748, 0.0006210979, -0.012423079, 0.019273477, -0.0018503293, -0.015110436, 0.017142372, -0.0010425788, -0.013060704, -0.02079388, 0.043504573, 0.009521007, -0.0022638114, 0.011449751, -0.0053135967, 0.0053341393, -0.070337266, 0.0691356, ...]",2
98,GoNets,"[-0.026044317, 0.0023744074, -0.009315581, -0.042990033, 0.013603565, -0.0042087804, 0.06998391, -0.0111228125, 0.021606375, -0.014405029, -0.049296193, -0.0191088, 0.009201086, 0.0113535, -0.01636492, 0.010639805, 0.02684836, -0.008739846, -0.019389285, -0.0054439357, -0.010955702, 0.02254499, -0.01327459, 0.0017213511, -0.0047770166, -0.0028068474, -0.0016746839, 0.014678269, -0.018379267, -0.038807664, -0.0025956822, 0.021419559, 0.022131268, 0.0073202336, -0.020294013, -0.009223073, -0.028954243, 0.02631628, -0.0033817359, -0.017254747, 0.024392644, -0.03911819, -0.0042671105, 0.029042214, -0.032943305, -0.0014538983, -0.019367753, -0.019912783, 0.02498949, 0.010525223, -0.03329277, -0.010695502, 0.01285219, -0.006241984, 0.009176051, 0.039848387, 0.0053874063, -0.005356286, 0.0050806943, 0.009401587, -0.0013156055, 0.013961479, -0.028383365, 0.00046944548, 0.0051505035, -0.02246885, -0.003130966, 0.017754212, 0.00014742005, 0.015536888, 0.038896844, 0.01793638, -0.002004517, -0.016637603, 0.0026111219, 0.029805677, -0.021514053, -0.025192982, 0.027392596, 0.018679697, 0.012483671, -0.028944029, -0.021565372, 0.0064252377, -0.024128707, 0.032874797, -0.031097729, 0.005596347, 0.019856565, -0.005863518, -0.01829903, -0.03387993, 0.026109215, 0.00090525055, -0.004941762, 0.010731741, -0.007256151, -0.019928135, -0.07489673, 0.074262194, ...]",2
100,Gunners,"[-0.0008371219, 0.012327413, -0.012331944, -0.027636074, 0.013076207, -0.0013755175, 0.05995351, 0.008867192, 0.0071127256, -0.01529069, -0.02650993, -0.026658325, 0.008260184, 0.022380203, -0.004601568, 0.014218153, 0.0012265084, -0.018890714, 0.004473829, -0.0061960164, -0.017997937, 0.025937803, -0.0031970246, -0.007229589, -0.008614224, -0.0011111012, 0.008337077, 0.015412476, -0.002338598, -0.024614654, 0.00976199, 0.02826274, 0.015917094, 0.008177385, -0.0260805, 0.0036615382, -0.012534314, 0.016104273, -0.010760415, -0.019210543, 0.016494237, -0.030322429, -0.008558897, 0.019625898, -0.014513463, 0.0060878387, 0.01001775, -0.019758748, 0.008431796, 0.010714767, -0.033592556, -0.005299739, 0.018421646, -0.010326102, 0.0017912213, 0.032971866, -0.0020891675, 0.0013426918, 0.012644883, 0.015090036, -0.0026767463, 0.019711368, -0.036141377, -0.004378719, -0.002647097, -0.034010895, -0.007154265, 0.0124226445, 0.0017428569, 0.04001981, 0.03059025, 0.004536058, 0.004984355, -0.012007103, -0.005693616, 0.010983267, -0.016467873, -0.012862711, 0.02739064, 0.006338315, -0.005785048, -0.030131197, -0.017344, -0.011469719, -0.006741785, 0.01791924, -0.008847877, -0.03151181, 0.006966476, -0.004505358, -0.0066578966, -0.021774696, 0.026176682, 0.024026757, 0.00028981201, 0.010467063, 0.003400119, 0.011303666, -0.083982006, 0.06660475, ...]",2
101,Habs,"[-0.03385072, 0.015808677, -0.011913222, -0.018269548, 0.019863658, -0.0026662764, 0.060549274, 0.00027133638, 0.025186334, -0.012300042, -0.040158473, -0.0075150086, -0.0040116995, 0.028393187, -0.010120122, 0.007532525, 0.0036390147, -0.0074641933, -0.013581182, -0.0051170844, -0.021840246, 0.02413032, -0.0059279096, 0.0018216184, -0.0048675067, 0.013628336, 0.006997444, 0.00977985, -0.015067218, -0.02277832, 0.0033514828, 0.02258833, 0.013662289, -0.005079316, -0.023411812, -0.01230144, -0.020286081, 0.015928956, -0.002353754, 0.0008736821, 0.018257815, -0.027293406, -0.01761391, 0.01741835, -0.021417568, 0.011901755, -0.009842293, -0.019923698, 0.029680923, 0.017439498, -0.04109692, -0.008387237, 0.022446273, -0.010378141, 0.018049547, 0.039158814, -0.0010654493, -0.014233048, 0.02683484, 0.008464225, 0.007776034, 0.020678947, -0.024230788, 0.0004982081, 0.0048909565, -0.033550493, -0.015084317, 0.008945714, 0.005063146, 0.031584214, 0.036126625, 0.010332564, 0.00036130604, -0.01933797, -0.008544689, 0.015847404, -0.010165826, -0.026448794, 0.021520097, 0.014995703, 0.0010321799, -0.034599803, -0.007950789, 0.005016368, -0.007757536, 0.020404004, -0.0047566537, -0.0074493974, 0.00072986796, -0.0066607078, -0.008724556, -0.0027164666, 0.02715605, 0.019284744, 0.0018491517, -0.0024680288, 0.00012084659, -0.017618183, -0.06749384, 0.08314256, ...]",2
130,LigaMX,"[-0.0035414454, 0.010510485, -0.014112104, -0.020898214, 0.0218455, -0.0028295966, 0.0456192, 0.017286364, 0.02048335, -0.00070943433, -0.014533941, -0.022326523, 0.0067074713, 0.034944076, -0.005816949, -0.0034123738, -0.022615338, -0.015986068, -0.00845088, -0.0041169645, -0.011411264, 0.021789396, -0.009992926, -0.001766207, 0.0033134534, -0.017967287, 0.0015030525, 0.011219023, -0.02088181, -0.015305308, 0.004143439, 0.029124523, 0.017118733, -0.0052506514, -0.030732077, -0.012176376, -0.009859576, 0.010935987, -0.0055640577, -0.0057810573, 0.0144312205, -0.0341319, -0.0086763045, 0.022363843, -0.011159669, -0.013852527, -0.0023965677, -0.019644551, 0.013019602, 0.012076731, -0.026112763, -0.0048208553, 0.0074168793, -0.00986836, 0.0080959285, 0.0383164, 0.0004611541, -0.001380672, 0.018361893, 0.019489625, 0.0037564058, 0.011438809, -0.03383267, -0.004455199, -0.010815421, -0.034191616, -0.013381499, 0.005023172, -0.008855438, 0.04439558, 0.035141908, 0.0037694022, 0.0062395744, -0.0068307007, 0.0044244714, 0.024892846, -0.01890117, -0.013094817, 0.023386285, 0.0005611897, 0.0026774893, -0.037654024, -0.014838167, -0.015616983, 0.0049582357, 0.008241502, -0.004423734, -0.020451507, 0.016929664, -0.0025167959, -0.0036032288, 0.00076460885, 0.02653332, 0.03294475, 0.0013727702, -0.008411508, 0.005661755, 0.017732881, -0.06666969, 0.070778206, ...]",2


In [35]:
# cluster 8 -> cluster 2
subreddit_embeddings.loc[subreddit_embeddings['cluster']==8, 'cluster'] = 2

In [36]:
subreddit_embeddings[subreddit_embeddings['cluster']==2] #sport

Unnamed: 0,subreddit,embedding,cluster
1,49ers,"[-0.018552188, -0.0028821093, -0.010455885, -0.031844795, 0.010581031, -0.005172225, 0.046305984, 0.011533315, 0.01129894, -0.013584505, -0.04259195, 0.0033355802, 0.009804972, 0.010599564, -0.0006873135, 0.008815442, 0.008406114, 0.00026762337, -0.0016090338, -0.014573603, -0.01664769, 0.032211084, -0.016571626, -0.004707403, 0.0015726627, -0.012411625, -0.005187421, 0.02579788, -0.02105614, -0.017291166, 0.0055347923, 0.031248076, 0.021179467, 0.000675637, -0.023365647, -0.010130943, -0.024683941, 0.027590336, 0.00096075144, -0.00077199616, 0.014498017, -0.031289928, -0.0017865194, 0.020937946, -0.040670794, -0.011148991, -0.014191534, -0.022238376, 0.016479896, 0.005604676, -0.026643623, -0.007814203, 0.012288588, 0.0015518678, 0.016842334, 0.04356018, -0.018324012, -0.004788586, -0.005676649, 0.021088494, -0.0073766876, 0.014238733, -0.026548555, -0.009252305, -0.0009091181, -0.0142886685, -0.00739615, 0.0075662225, -0.004795422, 0.020544996, 0.031024555, 0.0199509, -0.0011819891, -0.006068955, 0.010829666, 0.011118542, -0.00401216, -0.009677233, 0.031046646, 0.00692493, -0.012846708, -0.030788206, -0.02258014, -0.0055785608, -0.021892007, 0.031249726, -0.013291126, -0.020846916, 0.013081853, 0.00636993, -0.010408722, -0.040105782, 0.030874673, -0.0016292842, 0.0044602375, 0.00819574, 0.0022628978, -0.008834058, -0.074583605, 0.0638592, ...]",2
7,AFL,"[-0.012297828, -0.00060421904, -0.008414087, -0.018882267, 0.016993154, -0.0010358654, 0.04344437, -0.005131242, -0.008717113, 0.001504226, -0.033724684, -0.016960237, 0.00041312055, 0.020144595, -0.009974679, 0.009131653, 0.0016041128, -0.027630936, -0.0006431617, -0.010434832, -0.024380283, 0.02390933, -0.018400509, 0.0070949765, -0.014729497, 0.0056430106, -0.007975143, 0.027667882, -0.012850241, -0.017531455, -0.0062555773, 0.04199416, 0.018740445, 0.008254486, -0.018111434, -0.019052532, -0.010761204, 0.0039838892, -0.0077459337, -0.008689191, 0.02856639, -0.027199782, -0.00419254, 0.01986054, -0.01774702, -5.9711747e-05, -0.0018267494, -0.024870379, 0.009796092, 0.026341593, -0.02290974, -0.011651708, 0.0042538596, -0.016690113, 0.007830553, 0.034959678, -0.013920025, -0.0059651053, 0.0041742236, -0.007782833, 0.0018726853, 0.011532291, -0.05210236, -0.00023045059, 0.006417493, -0.020902317, -0.025449455, 0.022250658, 0.010743278, 0.013003652, 0.024687778, 0.0032347993, 0.011580168, -0.0060339463, 0.0012071735, 0.018033093, 0.004475032, -0.015810842, 0.03276503, 0.010764859, -0.0062289005, -0.023200057, -0.02276081, -0.00765531, -0.0023535984, 0.0047808033, -0.0076981834, -0.01919262, 0.008049161, -0.003773391, -0.019369952, -0.017437717, 0.031862304, 0.012674909, -0.0066905185, 0.010713278, -0.009491305, 0.007610279, -0.07093891, 0.07195311, ...]",2
24,AtlantaHawks,"[-0.0315351, 0.0014153741, -0.01976336, -0.038198095, 0.005658214, -0.019273447, 0.08157149, 0.00065174344, 0.017463373, -0.015803155, -0.041174695, -0.030677766, 0.007104755, 0.014713469, -0.0017132455, 0.014855582, 0.019448437, 0.013877335, -0.007706429, -0.012939681, -0.018769719, 0.02788166, -0.0033413805, -0.0032936125, -0.008951711, 0.004672571, 0.007185096, 0.017069176, -0.008688326, -0.031370047, -0.0009888352, 0.025514042, 0.014755453, 0.011058745, -0.027644863, -0.018373372, -0.03143744, 0.039328463, -0.00850512, -0.016942488, 0.03155596, -0.036294222, -0.0007055989, 0.024533655, -0.04051224, 8.5347034e-05, -0.014421494, -0.024255754, 0.03312818, 0.011838611, -0.047509953, -0.02074461, 0.013761496, 0.0064141206, 0.0066576516, 0.03934354, -0.0041241543, 0.0020466854, 0.01885187, 0.019672986, -0.0010653331, 0.009582471, -0.02474562, -0.016847542, 0.0061981296, -0.028462667, -0.007227445, 0.00048526478, 0.010221528, 0.030476172, 0.037682362, 0.025957564, -0.010543519, -0.022674538, -0.003867771, 0.022691103, -0.015963554, -0.031762287, 0.017123848, 0.02312578, -0.009901335, -0.016753137, -0.023263026, 0.004265947, -0.029990643, 0.03203627, -0.020615406, -0.0015990398, 0.009266502, -0.012815217, -0.013546166, -0.025056073, 0.033745747, 0.0015148405, -0.0016077937, 0.011543222, -0.010542994, -0.016444752, -0.08104232, 0.07143829, ...]",2
28,Barca,"[-0.025670474, 0.01977418, -0.01190819, -0.016351767, 0.0156090455, 0.0042018015, 0.069448605, 0.0008320357, 0.008617658, -0.017602244, -0.025469061, -0.01659686, 0.011655771, 0.020454764, -0.010726174, 0.010855266, -0.008453026, -0.009352836, -0.0042385855, -0.019094465, 0.003905233, 0.029229602, -0.010185215, 0.0015092741, -0.008361664, 0.015419983, -0.011307665, -0.00075201225, -0.0012750279, -0.043132674, 0.015588519, 0.034438457, 0.018698756, 0.01290723, -0.02843792, -0.015712414, -0.021037295, 0.020707333, -0.012740027, -0.019587459, 0.024416378, -0.03810207, -0.016648458, 0.022638189, -0.01706956, 0.0015465618, 0.009305166, -0.026018877, 0.019060668, 0.017649144, -0.04465892, -0.009309109, 0.0044891103, 0.005549996, -0.0046303235, 0.027086545, 0.0031662583, -0.009357456, 0.009259983, 0.037095785, -0.016005792, 0.022116352, -0.04561835, -0.0048254086, 0.0021618775, -0.046244983, -0.012419529, 0.0032831447, 0.007754066, 0.05484307, 0.022164145, 0.014832152, 0.0062472397, -0.018652376, -0.010860168, 0.004668108, -0.030769523, -0.024524374, 0.022709655, 0.012929849, -0.009054198, -0.028219758, -0.032289892, -0.008197346, 0.0027919444, 0.03463212, -0.012433803, -0.017071754, 0.007422681, -0.019346394, -0.018142432, -0.031083612, 0.04398463, 0.041468196, 0.004555261, 0.0065784627, 0.005463173, -0.00851715, -0.08467463, 0.075523786, ...]",2
36,Boxing,"[-0.016453667, 0.024034603, -0.012349807, -0.01811127, 0.010011467, 0.0067405724, 0.061220676, 0.0098949885, -0.00093790994, -0.012825096, -0.019498415, -0.027648546, 0.013723448, 0.020639708, -0.005030506, 0.016505033, 0.017510643, -0.004745305, -0.008042011, -0.0028181965, -0.0069949217, 0.03874862, 0.014211958, 0.0038811564, -0.004206443, 0.0030187054, -0.002934389, 0.012460631, 0.0031424412, -0.031237002, 0.018939903, 0.023809787, 0.015560185, 0.019366277, -0.043839354, -0.023558613, -0.013476645, 0.04575331, -0.015457641, -0.01759991, 0.0079095075, -0.024191178, 0.00020668072, 0.007247034, -0.004867449, -0.009188482, -0.0032514557, -0.026427858, 0.015240621, -0.011592402, -0.02866397, -0.026891485, 0.017529383, -0.006111289, 0.00602622, 0.015248967, 0.011851297, -0.007665477, 0.00883409, 0.017492658, -0.008040987, 0.020585742, -0.036998343, 0.0063018394, 0.010843659, -0.015907934, -0.001636974, 0.0032885787, 0.001196225, 0.047943946, 0.0373559, 0.0014175706, 0.0077665667, -0.020848313, -0.0070451815, 0.005743269, -0.015286518, -0.01250948, 0.034268774, 0.028501553, 0.0048776143, -0.020804005, -0.03787799, -0.009965939, 0.0022103444, 0.01809469, -0.008715981, -0.021932919, 0.005114549, -0.0067290124, -0.03505924, -0.008848756, 0.035429224, 0.026937382, -0.006076051, 0.0106513705, -0.0017725074, 0.0102474885, -0.084782295, 0.06329804, ...]",2
...,...,...,...
445,tennis,"[-0.024372179, 0.0062713884, -0.001255515, -0.025525928, 0.004810365, -0.0018543728, 0.059027977, -0.009292617, -0.00060632447, -0.020435894, -0.04270647, -0.026706655, -0.00079158304, 0.014611321, -0.010133948, 0.012513802, 0.024283974, -0.00066954776, -0.00045277167, 0.004515205, -0.003839591, 0.015666109, 0.00214833, 0.0013297108, -0.0042390185, 0.012380072, -0.0067139687, 0.0070547224, -0.0075504305, -0.042952448, -0.0017949366, 0.033230197, 0.009673275, 0.015590619, -0.02290203, -0.014858525, -0.033929445, 0.015260747, -0.0113073345, -0.0139502715, 0.013881207, -0.040799398, 1.9725114e-05, 0.016339997, -0.025045047, 0.007183481, 0.0042616753, -0.015316532, 0.017699411, -0.0035856783, -0.040686533, -0.01953022, 0.01213984, -0.004365769, -0.0022297832, 0.022260727, 0.008528306, -0.011940894, 0.011954939, 0.016666837, -0.007555673, 0.017764414, -0.0438973, 0.014315516, 0.0064267265, -0.032546032, 0.010067954, 0.010694994, 0.008489603, 0.04080997, 0.024259048, 0.026250921, 0.012440357, -0.02114489, 0.0016467036, 0.023235366, -0.018178333, -0.033701327, 0.025058711, 0.029699884, -0.00285006, -0.031483233, -0.033561792, 0.0049318294, 0.0011164899, 0.016855825, -0.017037902, -0.017443074, 0.004149442, -0.018147899, -0.029713348, -0.018365365, 0.03249344, 0.03308988, -0.006595588, 0.015774494, -0.011956704, -0.0026772686, -0.06358255, 0.06725147, ...]",2
451,timberwolves,"[-0.025223648, 0.008534533, -0.0053338236, -0.03216148, 0.0062842504, -0.0079666935, 0.06376154, -0.0061174165, 0.008614808, -0.015440084, -0.030756462, -0.013775907, 0.01070252, 0.0149492705, -0.0008446843, 0.010492005, 0.012118318, 0.0069995383, -0.013277718, -0.010899733, -0.021062352, 0.027524987, -0.007701341, -0.0032083662, -0.0059814556, 0.0030333155, 0.004714159, 0.00915393, -0.009936404, -0.03397375, -0.0017384407, 0.022670707, 0.016290177, 0.0085054245, -0.027882792, -0.017355733, -0.023645483, 0.026530886, -0.010424447, -0.0118588405, 0.02213132, -0.036656957, -0.006219085, 0.022957643, -0.030274592, 0.0063820435, -0.007449204, -0.025085716, 0.029265799, 0.012372667, -0.043334432, -0.008443104, 0.012179823, -0.0030171263, 0.0064036297, 0.036673117, 0.008296623, 0.0047851796, 0.01579871, 0.013383803, -0.005764827, 0.01410198, -0.03429464, -0.00030581263, 0.004555683, -0.022816142, -0.008416496, -0.00061417, 0.006886143, 0.034057006, 0.03148559, 0.022787549, 9.5017574e-05, -0.01985145, -0.0051818797, 0.012140484, -0.01521095, -0.024175962, 0.028460277, 0.02646624, -0.00096675334, -0.027340066, -0.026682273, 0.0048755617, -0.013876583, 0.022168007, -0.018442776, -0.010962997, 0.009752105, -0.004071168, -0.0110159675, -0.022116434, 0.033911657, 0.010285706, -0.004813693, 0.0035959813, -0.0060048252, -0.01218506, -0.0744977, 0.078197345, ...]",2
454,torontoraptors,"[-0.021062331, 0.00039484794, -0.0027505693, -0.033393804, 0.001685844, -0.011070273, 0.059785295, -0.0011132321, 0.011732092, -0.00919254, -0.03199759, -0.011199359, -0.00047335448, 0.021752749, -0.0016608217, 0.0077858376, 0.007282823, 0.0020900678, -0.01778307, -0.008837416, -0.017514806, 0.026987797, -0.00045571895, -0.0053382595, -0.0084301075, -0.0042044697, 0.0046929056, 0.014839552, -0.0071267835, -0.02410539, 0.004463151, 0.019591853, 0.019009637, 0.009052632, -0.02193298, -0.008116461, -0.02581522, 0.030682558, -0.008053501, -0.013522742, 0.02340096, -0.03621172, -0.0048863096, 0.017252909, -0.022958752, 0.0074323225, -0.01574126, -0.029353019, 0.032231998, 0.015502083, -0.03382825, -0.009637226, 0.020625906, -0.011641521, 0.011457269, 0.038345244, -0.002973955, 0.005858257, 0.017529348, 0.011302512, 0.0019156884, 0.015481558, -0.030513363, -0.0030153007, 0.003927538, -0.026691847, -0.0024206182, 0.010466979, -0.0019179675, 0.03493968, 0.029719232, 0.017573781, -0.0012895653, -0.009244277, -0.0007665908, 0.015161546, -0.013877744, -0.020100478, 0.034037087, 0.020244787, 0.004438662, -0.029398534, -0.026048584, -0.0007144673, -0.022239424, 0.017579528, -0.013604993, -0.012178799, 0.0055500674, -0.0008243558, -0.011435638, -0.015288618, 0.031847343, 0.006106518, -0.005582262, 0.0056646476, -0.0086600445, -0.009199227, -0.07355235, 0.07329941, ...]",2
474,wildhockey,"[-0.035899404, 0.008537683, 0.002723122, -0.01354465, 0.015206787, -0.01739036, 0.041413765, -0.006705522, 0.0128187025, -0.0169222, -0.03223024, -0.0007925781, 0.0064984146, 0.008261347, -0.012778828, 0.0029440718, -0.00064815755, -0.011490142, -0.009482643, -0.005342592, -0.018420527, 0.020211294, -0.0118703935, 0.01123543, 0.0035628295, 0.009326491, -0.012522174, 0.021424737, -0.015072015, -0.022126557, -0.013816358, 0.02310413, 0.011433779, 0.008570674, -0.002295053, -0.010576408, -0.010844547, 0.009312072, -0.0056997095, 0.002789678, 0.019957973, -0.026355004, -0.004764272, 0.024984552, -0.02773182, 0.005070208, -0.0048605185, -0.01929536, 0.021779025, 0.013345055, -0.02185693, -0.021280648, 0.0031330574, -0.003410939, 0.008944187, 0.032138318, -0.008624724, -0.021563478, 0.012221255, -0.0015292908, 0.012808924, 0.01909714, -0.028381236, 0.009525667, 0.0022637688, -0.031009283, -0.012659025, 0.009040735, 0.0023431957, 0.0149478, 0.031422917, 0.01992395, -0.0038266936, -0.002482245, -0.008181413, 0.017824125, -0.0060474533, -0.028581169, 0.028021647, 0.022172252, -0.0036501896, -0.028238876, -0.009523366, 0.012886267, -0.011317124, 0.023009172, -0.0070414427, -0.00830086, 0.0034618448, -0.016836481, -0.023179818, -0.013425093, 0.025539227, 0.010882771, -0.016232245, 0.0047058575, 0.000118122014, 0.00041388648, -0.06841274, 0.06966597, ...]",2


In [37]:
sport = list(subreddit_embeddings[subreddit_embeddings['cluster']==2]['subreddit'])
print(sport)

['49ers', 'AFL', 'AtlantaHawks', 'Barca', 'Boxing', 'Braves', 'Browns', 'CFB', 'CollegeBasketball', 'DenverBroncos', 'DetroitPistons', 'Dodgers', 'EdmontonOilers', 'GoNets', 'GreenBayPackers', 'Gunners', 'Habs', 'LigaMX', 'MLS', 'MMA', 'Mavericks', 'MkeBucks', 'NYGiants', 'NYKnicks', 'NYYankees', 'NewYorkIslanders', 'NewYorkMets', 'OrlandoMagic', 'OttawaSenators', 'Padres', 'ScottishFootball', 'TeamSolomid', 'canes', 'canucks', 'chelseafc', 'chicagobulls', 'denvernuggets', 'detroitlions', 'devils', 'fantasyfootball', 'goldenknights', 'hawks', 'hockey', 'leafs', 'minnesotavikings', 'nba', 'nrl', 'nyjets', 'penguins', 'realmadrid', 'ripcity', 'rugbyunion', 'sabres', 'soccer', 'sports', 'sportsbook', 'steelers', 'tennis', 'timberwolves', 'torontoraptors', 'wildhockey', 'winnipegjets']


In [38]:
l = 0
for s in sport:
    l += len(get_text_from_subreddit(dataset, s))

print(l)

7042


In [39]:
clusters.loc[clusters['cluster']=='2', 'Number of samples'] = l

In [40]:
clusters

Unnamed: 0,cluster,topic label,Number of samples
0,0,Humor,13791.0
1,1,Other,5839.0
2,2,Sport,7042.0
3,3,Politics,
4,4,Love and relationship,
5,5,Film and Tv series,
6,6,Videogames,


### Cluster 3 (Politics)

In [41]:
subreddit_embeddings[subreddit_embeddings['cluster']==3] #politics

Unnamed: 0,subreddit,embedding,cluster
6,ABoringDystopia,"[0.0072658556, 0.015259034, 0.014032003, -0.0026088099, 0.019907791, -0.008377345, 0.02459876, -0.01804409, 0.0062713344, 0.0061978973, 0.013386116, 0.022665164, 0.010316749, -0.0015940014, 0.0025742946, -3.6364767e-05, 0.013361589, -0.034753103, -0.020867964, 0.016733149, -0.016676554, 0.020626614, 0.015496618, 0.013178944, -0.005369803, 0.006652038, -0.010248697, 0.008933416, -0.007739651, 0.015044638, -0.005429787, 0.017159726, 0.0055317404, 0.012057486, -0.0071740556, 0.018146131, 0.042175956, 0.005336225, 0.017161693, -0.0073676393, 0.010539883, -0.026278099, 0.01450582, 0.0039401283, 0.0010339229, 0.0020493097, -0.010051464, -0.017349267, 0.018506305, -0.039857473, 0.022366753, -0.02585911, -0.0061835772, -0.016552188, 0.0049012583, -0.0038619016, 0.0050200922, 0.0028201798, 0.0029595052, 0.018895317, -0.024579369, 0.012161625, -0.014825955, 0.013697609, 0.0310916, 0.009155423, 0.0064553893, 0.010493841, -0.0008182162, 0.018587219, -0.0008696722, 0.0076991287, 0.01170394, 0.021246895, -0.016088368, -0.006009507, 0.005809683, -0.0033061919, 0.02356801, 0.008883669, 0.0048602843, -0.017486397, -0.010509205, 0.013380062, 0.0037136371, -0.015719835, 0.00037063443, -0.025682347, 0.024979686, 0.01049575, -0.027280547, -0.00583275, 0.018100834, 0.001029627, -0.010074375, -0.015081249, -0.016657013, 0.014293071, -0.05206539, 0.044596367, ...]",3
10,AdviceAnimals,"[-0.0051695043, 0.006849251, 0.013661254, -0.0010641126, 0.015339205, -0.015010577, 0.034139685, -0.0055909953, 0.007780489, -0.014037821, 0.0055802613, 0.0060521825, 0.0063170874, 0.0055033355, 0.0002718995, 0.001892615, 0.013668114, -0.030221704, -0.01733655, 0.015732726, -0.014114725, 0.011135993, 0.013905521, 0.0036650526, -0.008508468, 0.0032762887, -0.013016727, -0.00039120743, -0.016061142, 0.013217792, -0.0054501104, 0.03323616, 0.0003281428, 0.013338425, -0.008042652, -0.0048774695, -5.864447e-05, 0.013929911, 0.0057244855, -0.012236214, 0.011825076, -0.014381618, 0.007467672, 0.0048342803, -0.0060412977, -0.0010126518, -0.0030128935, -0.003756019, 0.022555742, -0.014385543, -0.014289804, -0.011750639, -0.0034092658, 0.0010214138, 0.0025244977, -0.0037446509, -0.005667478, 0.010908436, 0.010459744, 0.01610493, -0.011091395, 0.010760288, -0.0124276215, 0.017864978, 0.018195393, 0.019413866, -0.009572754, 0.0045994017, -0.01776303, 0.029920887, -0.0007182735, 0.012463987, 0.0042761043, 0.031219553, -0.007880627, -0.0045077195, 0.010293487, -0.012239668, 0.024942437, 0.024614526, -0.01164216, -0.0008971367, -0.002635425, 0.0069949366, 0.004741714, -0.014646423, -0.004620522, -0.018490307, 0.011394632, 0.0069173994, -0.02089605, -0.017878914, 0.036224656, 0.006270936, -0.0076630735, -0.014028824, -0.013569522, 0.02009956, -0.056344427, 0.048553184, ...]",3
12,Anarchism,"[0.0038602331, -0.003896529, 0.003364831, -0.0018519738, 0.012476095, -0.007551637, 0.020387134, -0.019857721, -0.008352634, 0.0012901434, 0.019419797, 0.013532631, 0.009490641, 0.008527075, -0.015732242, 0.012130169, 0.007298019, -0.037656333, -0.022869932, 0.019773219, -0.012847048, 0.015796239, 0.021516085, 0.0148632135, -0.013224804, 0.0023302343, -0.0030405107, 0.0065966006, -0.008375858, 0.010978446, -0.010173895, 0.020827103, -0.006934311, 0.018017512, 0.001334022, -0.0062628067, 0.03593125, 0.015379562, 0.01354877, -0.015654838, 0.0126664275, -0.016770411, 0.009698516, 0.0023081747, -0.010528195, 0.014360323, -0.017750887, -0.008833526, 0.014734573, -0.036007695, -0.012384122, -0.015032934, -0.00567076, -0.00013682712, 1.2298597e-06, -0.0026430006, 0.0012919146, 0.006642919, 0.0127333235, 0.0017261421, 0.0010407649, 0.0038328734, -0.020758914, 0.017906863, 0.028916392, -0.0012309994, -0.013489099, 0.014971932, -0.021593647, 0.012858097, 0.008065887, 0.0067839916, 0.019561654, 0.03090938, -0.015500818, -0.012622542, 0.012600643, -0.013873089, 0.017056387, 0.007850643, 0.0005133495, 0.006534274, -0.0016424981, 0.0007394012, 0.0075867306, -0.0109555945, -0.006301789, -0.019967789, 0.01651085, 0.0028509481, -0.021324696, 0.0032935338, 0.03602575, 0.0032237351, -0.010513151, -0.016065339, -0.009153506, 0.014399887, -0.0554729, 0.04879651, ...]",3
13,Anarcho_Capitalism,"[0.0031262855, -0.004614774, -0.00058333896, 0.0059705195, 0.017975207, -0.0036779416, 0.03387641, -0.025736619, -0.000571943, 0.006652936, 0.017076802, 0.021028358, 0.011115848, -0.004632664, -0.0051654964, 0.012707379, 0.0075667957, -0.025629869, -0.013795694, 0.03098369, -0.017846461, 0.014585175, 0.0071799946, 0.013429734, -0.0013411891, 0.0019544105, -0.0058528837, 0.004150207, -0.010631725, 0.018579507, -0.010452526, 0.024010751, -0.00028402664, 0.017749023, 0.009367169, 0.0026771794, 0.03045263, 0.014443399, 0.008862392, -0.022691306, 0.01809309, -0.027297048, -0.008580155, 0.00018736288, -0.015441864, 0.019587176, -0.011957546, -0.0029988668, 0.005547177, -0.033443544, -0.013877755, -0.0118788285, -0.010323109, 0.007028895, -0.0054489244, -0.014744843, -0.002183788, 0.012396353, 0.006087241, 0.013172598, -0.0045842896, 0.005998542, -0.022074647, 0.0070817918, 0.04105914, 0.013558832, -0.013170624, 0.0065786294, -0.030872585, 0.017155439, 0.011854983, 0.0066666417, 0.011389571, 0.02686612, -0.02546166, -0.025346396, 0.017037878, 0.004189245, 0.02918784, 0.013219493, -0.0038980185, -0.0019126213, -0.015789893, 0.0022878724, 0.0056264442, -0.009800743, -0.016352698, -0.01859237, 0.021631189, 0.003604021, -0.019887742, -0.0025462967, 0.04603788, -0.004825502, 0.0052866717, -0.026375396, 0.000436833, 0.016018288, -0.05824056, 0.05182156, ...]",3
18,AskALiberal,"[0.0037337383, -0.0040126056, 0.008046444, -0.010940855, -0.0010555433, -0.003999347, 0.0075162295, -0.022420568, -0.013833207, -0.007668791, 0.013222883, 0.02586911, 0.004136976, 0.0039828303, 0.004117776, 0.017573409, 0.017960466, -0.031947065, -0.0057066064, 0.02973024, -0.0062919124, 0.01168033, 0.026256008, 0.010648655, -0.00021289436, -0.015538319, 0.0050066933, 0.0037478341, -0.013441034, 0.0070476527, -0.0051506576, 0.029301023, 0.0016462697, 0.017505832, 0.002241419, -0.00058943074, 0.021395521, 0.015540612, 0.007448116, -0.012117651, 0.019376943, -0.010218315, 0.0016996936, -0.0013807369, -0.016227767, 0.003569716, -0.0144815985, -0.01526426, -0.0031801492, -0.035300445, -0.012928024, -0.00504205, -0.0076287407, -0.00035890512, 0.004553941, -0.0056478465, -0.005383984, 0.005464461, 0.010822757, 0.007214923, 0.0045901514, 0.007769839, -0.02528585, 0.020600185, 0.019236859, 0.012131588, -0.010535019, -0.00097457616, -0.017546853, 0.004307948, 0.005541482, 0.021089958, 0.0133809075, 0.027742362, -0.0107581625, -0.017088534, 0.009316709, 0.013311225, 0.031322483, 0.004484619, 0.0007586235, 0.00030467106, -0.0059167524, -0.007024961, 0.006901674, -0.018235246, -0.010961309, -0.032155022, 0.0077063874, 0.009030441, -0.02561, -0.003847844, 0.050002005, -0.0011338069, -0.017636856, -0.008914904, -0.010376541, 0.02178462, -0.048856575, 0.039278053, ...]",3
...,...,...,...
459,ukpolitics,"[0.0099683525, 0.00048910844, 0.009516726, -0.015607951, 0.023063296, 0.008558997, 0.0114249, -0.02696909, 0.0029028254, -0.006629918, -0.0029426613, 0.016187536, -0.0010856157, 0.00018918068, -0.0043181926, 0.015022427, 0.00032093597, -0.041615285, -0.018521428, 0.028440319, -0.005695339, 0.016077338, 0.0076147807, 0.021187622, 0.00483705, 0.0019976532, 0.0022060084, 0.0014818076, -0.0049823495, -0.0023454882, -0.0017047358, 0.031203479, -0.0075030616, 0.016735384, 0.00770315, -0.0043735895, 0.019296644, 0.0080043925, 0.004597128, -0.027735202, 0.020447472, -0.029636104, -0.0038303607, 0.0010556569, -0.00304594, 0.021508487, 0.0024294315, -0.01592463, -0.01282632, -0.015923709, -0.006394473, -0.0067336746, 0.0002981341, -0.026050849, 0.004722377, -0.008700255, -0.0053177243, 0.0041402406, 0.00848992, 0.011717353, -0.006531218, 0.021178981, -0.031972155, 0.012808774, 0.024394177, 0.0031354206, -0.009295441, 0.005804179, -0.020804416, 0.023006096, 0.00014003667, 0.003697358, 0.019681912, 0.011702791, -0.01021093, -0.0072089834, 0.005885755, 0.012403791, 0.030924143, 0.0017940506, -0.009925444, 0.0024235372, -0.008361903, -0.008778969, 0.0067476886, -0.026392555, -0.009646993, -0.030455217, 0.013995936, 0.0049752, -0.029235544, -0.012775388, 0.05847103, 0.024515439, 0.00072528113, 0.0005954729, -0.008426925, 0.03477879, -0.05394682, 0.048975166, ...]",3
460,unitedkingdom,"[0.018888898, 0.0034703703, 0.012171218, -0.00855787, 0.013627709, -0.004192025, 0.027509896, -0.02726709, -0.011426188, -0.00603472, 0.0091151055, 0.0053744386, 0.0046213605, 0.0020078488, -0.0118270945, 0.006702366, 0.009466922, -0.039531074, -0.011251049, 0.01751185, -0.017585725, 0.017835455, 0.012719786, 0.015801702, 0.0062567885, 0.005858391, -0.004004299, 0.0065602344, -0.0069853496, 0.007035093, 0.0023571833, 0.032668162, -0.0007108901, 0.009238115, 0.00051635725, -0.006184053, 0.021712244, 0.008122252, -0.0016731315, -0.023594392, 0.018181363, -0.022014445, -0.0056284848, 0.0005983131, -0.0052724234, 0.02133152, -0.004245388, -0.015502465, 0.0006718759, -0.014399261, -0.012141784, -0.013542695, 0.002331249, -0.018968195, 0.0011363783, -0.008267745, -0.0071621095, 0.007913043, 0.0043165786, 0.008951771, -0.012933791, 0.010098037, -0.025753448, 0.01267087, 0.027696915, 0.00082828035, -0.017751625, -0.00059580343, -0.0242083, 0.017989293, -0.0073402934, -0.00026566518, 0.02077974, 0.019447604, -0.016624061, -0.019013809, 0.00819216, 4.182144e-05, 0.020121137, 0.014628191, -0.0109367715, -0.00088678225, -0.01792674, -0.0058821267, 0.0076158666, -0.022701403, -0.0044599473, -0.03391892, -0.0024419092, -0.0025104114, -0.026270198, -0.01080121, 0.041168306, 0.024687773, -0.0028018316, -0.012763145, -0.005363714, 0.03008662, -0.060337402, 0.049081933, ...]",3
461,unpopularopinion,"[0.0033812623, -0.0018909172, 0.013070856, 0.0072176377, 0.012200242, -0.010173195, 0.027560854, -0.005651277, 0.006936245, -0.0036354684, 0.00852254, -0.0044718743, 0.0036881198, 0.002313965, -0.0034686278, 0.0033399384, 0.019216832, -0.0345097, -0.020729015, 0.02147851, -0.014711314, 0.021607963, 0.01969446, 0.0010230247, -0.01725933, -0.0015985952, -0.010877483, 0.000832219, -0.01873806, 0.02179656, -0.0061029154, 0.03580643, 0.0005397481, 0.009617479, -0.012918605, -0.008902501, 0.012226878, 0.02371707, 0.0086385645, -0.01471738, 0.0125507815, -0.0040009613, 0.0067833075, -0.0019891197, -0.007863913, -0.0015789932, -0.011190317, -0.016126119, 0.0048490684, -0.032296825, -0.013595774, -0.0133335665, -0.007156072, 0.006911658, -0.0014988953, -0.008187408, -0.016577944, 0.008449608, 0.011746815, 0.0129557, -0.012198706, 0.01852191, -0.01529274, 0.01367326, 0.029133767, 0.0077277375, -0.0088636875, 0.016405702, -0.012087968, 0.01992327, 0.003663726, 0.017760724, 0.011128042, 0.038595695, -0.011319459, 7.983695e-05, 0.009148585, -0.014403387, 0.029739633, 0.026195928, -0.011454602, 0.0037372347, -0.0060874615, -0.0008985455, -0.0019964704, -0.02153231, -0.0039917873, -0.024896964, -0.0047866837, 0.010476093, -0.03207285, -0.005923924, 0.042526927, 0.0012738701, 0.0018892982, -0.02353846, -0.022861045, 0.017779008, -0.054920197, 0.048535585, ...]",3
477,worldnews,"[0.00798439, 0.005969569, 0.011120817, 0.0013971196, 0.011880738, -0.011792764, 0.01829925, -0.024765268, -0.0049920087, -0.004868887, 0.0054699928, 0.014398517, 0.008084991, 0.016626589, -0.013016189, 0.00086075295, 0.0016322578, -0.030403588, -0.008254285, 0.020786935, -0.0063809245, 0.021200253, 0.022923095, 0.006836161, -0.0038171348, 0.006160202, -0.0031184498, 0.0026440735, -0.009918606, 0.009544407, -0.0041774972, 0.026724273, -0.004850121, 0.01235415, 0.0035846133, 0.0020910227, 0.020223029, 0.012686228, 0.012739672, -0.013773324, 0.02217768, -0.028016584, 0.0042110356, 0.0028012982, -0.008311321, 0.011986777, -0.013680204, -0.013119849, 0.0063493685, -0.015647203, -0.002882019, -0.0046379967, -0.004874819, -0.0065325527, 0.0072478163, -0.002207137, -0.0052262666, -0.002330991, 0.008127201, 0.004450262, -0.0011422591, 0.013410062, -0.027302012, 0.011361359, 0.018882878, 0.005916202, -0.006100593, 0.0043795174, -0.023676116, 0.019287335, 0.012127622, 0.009386712, 0.01609062, 0.023074813, -0.014065492, -0.007405852, 0.009306944, 0.004289242, 0.022769868, 0.00199099, -0.0023088122, -0.010625741, -0.01073851, 0.00076410646, 0.011681085, -0.014839091, -0.0051052347, -0.026549205, 0.01587674, 0.008362431, -0.024867492, -0.012512596, 0.042619992, 0.017625961, -0.008995265, -0.008463443, -0.0098785665, 0.024655135, -0.059250396, 0.03959739, ...]",3


In [42]:
politics = list(subreddit_embeddings[subreddit_embeddings['cluster']==3]['subreddit'])
print(politics)

['ABoringDystopia', 'AdviceAnimals', 'Anarchism', 'Anarcho_Capitalism', 'AskALiberal', 'AskFeminists', 'AskThe_Donald', 'COMPLETEANARCHY', 'CanadaPolitics', 'CapitalismVSocialism', 'Catholicism', 'China', 'Christianity', 'Conservative', 'CryptoCurrency', 'DebateAnAtheist', 'ENLIGHTENEDCENTRISM', 'Enough_Sanders_Spam', 'Futurology', 'GCdebatesQT', 'GenderCritical', 'IncelTears', 'JordanPeterson', 'Judaism', 'KotakuInAction', 'LabourUK', 'LateStageCapitalism', 'Libertarian', 'MakingaMurderer', 'MensRights', 'MorbidReality', 'MurderedByWords', 'OutOfTheLoop', 'PoliticalDiscussion', 'PoliticalHumor', 'PurplePillDebate', 'SandersForPresident', 'Scotland', 'SelfAwarewolves', 'ShitPoliticsSays', 'Shitstatistssay', 'The_Donald', 'The_Mueller', 'TopMindsOfReddit', 'TrueReddit', 'UpliftingNews', 'VoteBlue', 'WayOfTheBern', 'alberta', 'antinatalism', 'atheism', 'aznidentity', 'badunitedkingdom', 'belgium', 'bestof', 'brexit', 'canada', 'changemyview', 'collapse', 'conspiracy', 'environment', 'exc

In [43]:
l = 0
for s in politics:
    l += len(get_text_from_subreddit(dataset, s))

print(l)

10273


In [44]:
clusters.loc[clusters['cluster']=='3', 'Number of samples'] = l

In [45]:
clusters

Unnamed: 0,cluster,topic label,Number of samples
0,0,Humor,13791.0
1,1,Other,5839.0
2,2,Sport,7042.0
3,3,Politics,10273.0
4,4,Love and relationship,
5,5,Film and Tv series,
6,6,Videogames,


### Cluster 4 (Love and relationship)

In [46]:
subreddit_embeddings[subreddit_embeddings['cluster']==4] # love and relationship

Unnamed: 0,subreddit,embedding,cluster
9,Advice,"[-0.007617102, -0.006089481, 0.023801913, -0.0049976697, 0.006722086, -0.009879444, 0.0027762912, -0.013150741, -0.0022804427, -0.025250545, 0.003895956, -0.008447441, -0.014658925, 0.017414179, 0.00083230727, 0.011905843, 0.020765819, -0.017536398, -0.025998984, 0.0098259365, 0.006029329, -0.000421811, 0.0010565891, -0.0071402416, -0.011854249, 0.01179078, -0.0069694854, -0.002212876, -0.009973843, 0.005469132, -0.013630164, 0.023226213, -0.0039365073, -0.0019264787, 0.0012191379, -0.0013369834, 0.00090062193, 0.008878565, 0.00906951, -0.0061895913, 0.011221791, 0.01499977, 0.016571833, -0.002558864, -0.014552178, 0.011065244, -0.0067599528, -0.017204944, 0.02439022, -0.013878089, -0.013594297, -0.01876017, -0.016748287, 0.004117605, 0.016464056, 0.010404959, -0.001767, 0.0034639859, 0.013612827, 0.0014033313, -0.014266689, 0.018927049, -0.019044824, 0.01787677, 0.011834687, 0.030601718, -0.015444895, 0.008260437, 0.014992618, 0.015457385, -0.008774464, 0.012171178, 0.0015264204, 0.025315216, -0.0047199186, 0.010815794, 0.004645248, -0.03240198, 0.033621814, 0.032036293, -0.023822723, 0.018345598, -0.0005302871, 0.0067950827, -0.015412926, -0.014910379, 0.005240889, -0.020096406, 0.0032101183, 0.013405534, -0.005293882, 0.0044164644, 0.029262709, 0.0056819436, -0.013889478, -0.0058947117, -0.039159242, 0.015333993, -0.05724625, 0.03954862, ...]",4
20,AskMen,"[-0.00092732353, 0.0030645514, 0.024757257, 0.0054917857, 0.0075081964, -0.01874744, 0.037123837, -0.013795566, 0.015251031, -0.010160403, 0.006198824, -0.0060085594, -0.0030103636, 0.015348448, 0.010909678, -0.0019617914, 0.025535518, -0.023679353, -0.021577079, 0.01950273, -0.013287892, 0.008497651, 0.014970958, -0.0028421902, -0.015570185, 0.0035307496, -0.0065879193, -0.0057931393, -0.015539492, 0.008235079, -0.02162828, 0.026916932, -0.0029244304, 0.0028267393, -0.016963553, 0.0018820948, -0.0013078743, 0.0048784516, 0.012449013, -0.0063571176, 0.0014017506, 5.8582086e-06, 0.01757279, 0.003813128, -0.010587379, 0.0003509287, 0.012031651, -0.018160192, 0.016769474, -0.017428432, -0.0061667217, -0.014912412, -0.007042869, 0.010195926, 0.014117682, 0.00586648, -0.00913574, 0.0015374006, 0.006566269, 0.006254823, -0.019042924, 0.02007505, -0.01459621, 0.013621057, 0.018432954, 0.008460761, -0.0092553245, 0.02425202, 0.0041757952, 0.032659035, -0.014282995, 0.017026361, -0.0135740535, 0.03275813, -0.015610154, 0.006618379, 0.018586727, -0.037159007, 0.025565425, 0.030684859, -0.016784167, -0.001025576, 0.0064343805, 0.0093185315, -0.0157774, -0.026558947, 0.011401212, -0.025011433, 0.0005015349, 0.0036333464, -0.03268356, -0.009433834, 0.004585114, 0.0006160571, -0.018123312, -0.010936314, -0.03277145, 0.02018474, -0.05254829, 0.048579216, ...]",4
21,AskMenOver30,"[-0.00033440115, 7.5315074e-05, 0.027577708, 0.011378074, 0.0034606406, -0.012567688, 0.017438041, -0.013919916, 0.0004571832, -0.01065009, 0.00040058937, -0.009770518, 8.872736e-05, 0.020293813, 0.006087495, 0.0075203744, 0.010946129, -0.024279198, -0.012105199, 0.00800622, -0.019480243, 0.0051570083, 0.0086863665, -0.006083225, -0.008715783, 0.016366713, -0.014496227, 0.003371099, -0.019047488, 0.010346551, -0.005875625, 0.025884904, 0.0026535438, 0.004448075, -0.005808102, 0.0015150654, -0.008312576, 0.016085135, 0.013355713, 0.0015727425, 0.009431619, 0.008936796, 0.017463312, -0.0060688998, -0.015025388, -0.005578454, 0.007826518, -0.023369178, 0.028953718, -0.009787881, -0.009488783, -0.016229961, -0.0102968495, -0.004507904, 0.0069656023, 0.019979853, 0.0004618887, 0.009591891, 0.00475586, 0.0031820904, -0.019205453, 0.016147042, -0.016962137, 0.015317831, 0.0110892225, 0.014319615, -0.0131004695, 0.014709971, -0.00078331504, 0.022105176, -0.009690974, 0.016572885, -0.00024340085, 0.030919185, -0.012652453, 0.01001007, 0.010019614, -0.029640565, 0.027822558, 0.018612135, -0.023047684, 0.004246509, -0.012080462, 0.00825575, -0.014061381, -0.024257503, 0.009768016, -0.016966224, 0.007014548, 0.0134993605, -0.020894527, 0.000395546, 0.017389785, 0.005809298, -0.019622885, -0.00983553, -0.031499207, 0.022463001, -0.0672704, 0.039057195, ...]",4
23,AskWomen,"[-0.010680591, -0.009031852, 0.02447443, 0.013938489, 0.0022670627, -0.030222174, 0.023930864, -0.008892975, 0.010742938, -0.012595517, -0.0054341187, -0.005320456, -0.0077671288, 0.01072722, 0.0052782833, 0.0015939435, 0.02027696, -0.021724392, -0.007931836, 0.01555383, -0.018607387, 0.009084609, 0.0073041003, -0.0041590235, -0.010438552, 0.008613132, -0.0056092734, 1.5890237e-05, -0.015919445, -0.0010206061, -0.015192144, 0.023802234, -0.002357872, -0.0062270784, -0.010116307, -0.0018884932, -0.0040196064, 0.008744927, 0.008250255, -0.0074104825, 0.0050142533, 0.011049613, 0.02023862, 0.0031058153, -0.0065170494, 0.002616495, 0.014575168, -0.015260944, 0.013606705, -0.006482852, -0.010178081, -0.022759026, -0.0038080558, 0.0008257336, 0.0066278665, 0.0063525075, -0.004630999, 0.00023018068, 0.0056173885, 0.011989555, -0.0263495, 0.021053161, -0.012699368, 0.021711309, 0.016597506, 0.017536245, -0.0104294615, 0.013329776, 0.0014868755, 0.022281943, -0.0054712184, 0.017819475, -0.0008760682, 0.03622499, -0.01092219, 0.007381529, 0.013421853, -0.040672377, 0.016013565, 0.039362714, -0.021942537, 0.003007043, -0.0052556423, 0.007973056, -0.013521267, -0.024346696, 0.012026415, -0.021731347, -0.002250999, 0.004870498, -0.027351907, -0.0020173916, 0.009196772, 0.0072862906, -0.018472314, -0.011874939, -0.027073473, 0.007117987, -0.06463437, 0.037910312, ...]",4
26,BPD,"[-0.015339054, -0.011907564, 0.04142772, 0.0121148005, 0.02944151, -0.0096824905, 0.0052583544, -0.006487029, 0.01594426, -0.0310886, -0.013226495, 0.009777942, 0.0029796874, 0.020049894, 0.007819281, 0.010001958, 0.0057917004, -0.026442494, -0.030668952, 0.02503421, -0.024270939, 0.0004437296, -0.00434633, -0.001235154, -0.018600333, 0.015989196, -0.017010419, 0.0009919805, -0.0063465973, -0.012685689, -0.022733781, 0.019173814, -0.0013499116, 0.0012994336, 0.00029173266, 0.0054311557, -0.0120848585, 0.002945385, 0.009381524, -0.008620467, 0.0058530965, 0.021877896, 0.024656003, -0.00435591, -0.010058458, 0.0068459404, 0.0020933582, -0.02375388, 0.024416689, -0.023670897, -0.016885115, -0.007314563, -0.008955607, 0.007509672, 0.017483374, 0.034182668, 0.018445503, 0.016200636, 0.0026806828, 0.0038784486, -0.017591905, 0.019186497, -0.0044720806, 0.017485263, 0.030119805, 0.03622793, -0.012456148, 0.00716247, 0.0022415968, 0.018263876, -0.03309561, 0.012588291, -0.006903059, 0.016202651, -0.009406415, 0.01826373, -0.00016962575, -0.045716252, 0.026944084, 0.041263577, -0.006480386, 0.043636393, -0.008738537, 0.0038750058, -0.023861524, -0.034812, 0.010119885, -0.012279596, 0.019040873, 0.007174067, -0.014981192, 0.009437095, 0.018214615, 0.013765733, -0.013703491, -0.019910293, -0.037141588, 0.017848786, -0.06368823, 0.036530014, ...]",4
...,...,...,...
431,socialanxiety,"[-0.0064503993, -0.0150818685, 0.039892826, 0.0056231236, 0.02169309, -0.022850452, 0.020783227, -0.007247369, 0.011685687, -0.0328022, -0.007310983, -0.010873986, 0.0029060987, 0.0061805877, 0.006617345, 0.0090445215, 0.0036626267, -0.022680702, -0.030743487, 0.020124715, -0.029051643, 0.001976285, -0.0029904977, 0.0035461076, -0.024437845, 0.010271249, -0.017117592, -0.012424234, -0.009285329, 0.0047096056, -0.01407469, 0.02220988, 0.0005613469, 0.002911608, -0.0009820989, 0.00035884205, -0.007870463, -0.0019807199, 0.012162386, 0.001233351, 0.0056557, 0.01927898, 0.02637114, 0.00097583345, -0.0059427638, -0.0054343245, 0.007065324, -0.017308377, 0.028312912, -0.022206543, -0.017275875, -0.016555615, -0.016461613, -0.0027865663, 0.015266498, 0.031968065, -0.0019513358, 0.016405717, 0.007488157, 0.0035591952, -0.01400368, 0.00722218, -0.01521868, 0.019828746, 0.027159097, 0.035851218, -0.008123229, 0.02344483, 0.0012869617, 0.023849819, -0.014829455, 0.0056227217, -0.0025140208, 0.03323632, 0.001131146, 0.013985601, -6.0886417e-05, -0.04713586, 0.032249026, 0.046333045, -0.010889876, 0.023635402, -0.009331929, 0.0019839988, -0.018081246, -0.0363378, 0.013605418, -0.012009505, 0.009008439, 0.012883802, -0.017732136, 0.0040682643, 0.00979036, 0.012107525, -0.008050107, -0.009186619, -0.037364338, 0.023430046, -0.06121468, 0.044253096, ...]",4
439,sugarlifestyleforum,"[-0.0052152355, -0.011912172, 0.028349165, 0.007998947, 0.007333812, -0.015411564, 0.028172826, -0.0149853295, 0.009125861, -0.0146970805, 0.0025032745, -0.008897096, 0.0028711306, 0.013066432, 0.009774466, 0.0056980355, 0.01927081, -0.019856166, -0.013609611, 0.028630508, -0.006209206, -0.0031614024, 0.0062080016, -0.010435392, -0.0033551583, -0.006416085, -0.01010713, -0.002764569, -0.02389953, -0.0009486792, -0.0064620804, 0.05152832, 0.002767894, 0.0050904704, 0.006712928, -0.0060721077, -0.0028895822, -0.007983055, -0.00010180393, -0.0047166664, -0.003895566, 0.0018609443, 0.014679791, 0.010903457, -0.02184457, 0.009992715, 0.0046227016, -0.00773205, 0.013473183, -0.0056243353, -0.016129604, -0.010243172, -0.006102036, 0.003855238, -0.0060502007, 0.016676389, -0.001452123, -0.0062898365, 0.0026966778, 0.009967764, -0.021270253, 0.026579048, -0.020457314, 0.02004503, 0.01499854, 0.0066756136, -0.027074303, 0.019586846, -0.006659468, 0.016444584, 0.0049735475, 0.009941531, -0.003596416, 0.030520149, -0.0037631374, 0.0048484625, 0.020346891, -0.030759364, 0.028366085, 0.026641337, -0.01143163, -0.007581375, 0.0012137004, 0.0074024163, -0.014867893, -0.022752913, -0.014300571, -0.021457806, 0.0034020043, -0.003176772, -0.0091788545, -0.009677251, 0.013015933, 0.012045369, -0.03039808, -0.0058954633, -0.027252315, 0.019772386, -0.06724491, 0.03468105, ...]",4
440,survivinginfidelity,"[-0.01299571, -0.0017147186, 0.03949354, -0.014167567, 0.00397227, -0.005079095, 0.008644628, -0.015262618, 0.014797643, -0.029446326, -0.013199992, 0.0078040976, 0.0047393665, 0.016922912, 0.0070352093, 0.03181779, 0.010291606, -0.022587785, -0.040739637, 0.00627224, -0.021579003, 0.009393559, 0.008433926, -0.006882084, -0.016546775, 0.01050719, -0.02038175, 0.0008532787, -0.01300713, 0.004081967, -0.013435149, 0.015138554, -0.0040795123, 0.017787244, 0.003223288, -0.004544448, -0.019547509, 0.010625704, 0.015692508, -0.011641103, 0.007835628, 0.011146477, 0.026196793, 0.0011237708, -0.024279475, 0.016587617, -0.010509173, -0.03308289, 0.019762712, -0.032661017, -0.039403167, -0.0038327463, -0.02047785, -0.0016770303, 0.020212494, 0.027943976, 0.022637771, 0.013020194, 0.0053755553, -0.010861344, -0.013545546, 0.04217488, -0.009775263, 0.019162612, 0.007723496, 0.03176857, -0.0073279045, 0.009724348, 0.009513742, 0.046004873, -0.011080266, 0.021433912, -0.0068371124, 0.028549865, -0.01673666, 0.019903844, -0.0005871278, -0.027794763, 0.038940717, 0.019613419, -0.025772689, 0.026556524, -0.009079053, 0.006799868, -0.025271596, -0.04113019, -0.00486449, -0.03549181, 0.024767071, 0.0029519957, -0.014700435, 0.010015076, 0.040374346, 0.0055347197, -0.019294178, -0.004542937, -0.041597974, 0.027842427, -0.07635615, 0.03880461, ...]",4
455,traaaaaaannnnnnnnnns,"[-0.020570245, -0.002882197, 0.027145175, -0.003802262, 0.012324058, -0.022656322, 0.042599775, -0.0075460654, -0.0015186319, -0.009812989, -0.007691715, -0.0137383165, 0.0092629185, 0.00734785, -0.0037418846, 0.012972971, 0.0097107915, -0.011039929, -0.024748188, 0.010945226, -0.020114949, 0.014151855, 0.0068364693, 0.0021730377, -0.019818857, 0.0066902875, -0.007628128, 0.003050298, -0.011465041, -0.011133402, -0.02498082, 0.032432113, -0.0018257346, 0.012136074, -0.01201795, -0.0063102986, 0.0012855015, -0.0016830029, 0.0059459656, -0.0052132327, -0.00036846867, -0.015368997, 0.008318985, 0.010461221, -0.0063330247, 0.0052339938, 0.006217679, -0.014294006, 0.017925823, -0.007103963, -0.020400643, -0.021254377, -0.010137493, 0.008496684, 0.0030911416, 0.021096619, 0.0024370563, -0.008544418, 0.01568433, 0.010606878, -0.017041316, 0.016659105, -0.012431962, 0.013130763, 0.02024906, 0.002519307, -0.010709322, 0.00602304, -0.0039083324, 0.022602558, -0.0018080354, 0.013872588, -0.0036744466, 0.02633068, -0.008058596, 0.010373666, 0.010720207, -0.03288942, 0.0273878, 0.030456824, -0.01445082, -0.0017476532, -0.0064829686, 0.006352351, -0.011121747, -0.00900439, 0.0016626883, -0.024256214, -0.009984439, 0.004289848, -0.02059556, -0.0028910823, 0.021831928, 0.021788692, -0.013399453, -0.025245855, -0.019894175, 0.0016988028, -0.06660484, 0.056814384, ...]",4


In [47]:
love =  list(subreddit_embeddings[subreddit_embeddings['cluster']==4]['subreddit'])
print(love)

['Advice', 'AskMen', 'AskMenOver30', 'AskWomen', 'BPD', 'BPDlovedones', 'DeadBedrooms', 'Divorce', 'DoesAnybodyElse', 'Drugs', 'EDAnonymous', 'ForeverAlone', 'GetMotivated', 'INTP', 'IncelsWithoutHate', 'JUSTNOFAMILY', 'JUSTNOMIL', 'JustNoSO', 'LGBTeens', 'LetsNotMeet', 'Marriage', 'NarcissisticAbuse', 'NoFap', 'OkCupid', 'Parenting', 'SuicideWatch', 'TooAfraidToAsk', 'TrueOffMyChest', 'Trufemcels', 'TwoXChromosomes', 'adultery', 'askwomenadvice', 'aspergers', 'bipolar', 'breakingmom', 'childfree', 'confession', 'confessions', 'dating', 'dating_advice', 'datingoverthirty', 'depression', 'drunk', 'dxm', 'entp', 'fatlogic', 'gay', 'gaybros', 'intj', 'lgbt', 'mentalhealth', 'offmychest', 'polyamory', 'raisedbynarcissists', 'rant', 'relationship_advice', 'relationships', 'seduction', 'self', 'sex', 'socialanxiety', 'sugarlifestyleforum', 'survivinginfidelity', 'traaaaaaannnnnnnnnns', 'trees']


In [48]:
l = 0
for s in love:
    l += len(get_text_from_subreddit(dataset, s))

print(l)

9026


In [49]:
clusters.loc[clusters['cluster']=='4', 'Number of samples'] = l

In [50]:
clusters

Unnamed: 0,cluster,topic label,Number of samples
0,0,Humor,13791.0
1,1,Other,5839.0
2,2,Sport,7042.0
3,3,Politics,10273.0
4,4,Love and relationship,9026.0
5,5,Film and Tv series,
6,6,Videogames,


### Cluster 5 (Film and TV series)

In [51]:
subreddit_embeddings[subreddit_embeddings['cluster']==5] #flim and TV series

Unnamed: 0,subreddit,embedding,cluster


In [52]:
# cluster 9 -> cluster 5
subreddit_embeddings.loc[subreddit_embeddings['cluster']==9, 'cluster'] = 5

In [53]:
subreddit_embeddings[subreddit_embeddings['cluster']==5] #flim and TV series

Unnamed: 0,subreddit,embedding,cluster
3,90DayFiance,"[-0.019507552, -0.00010089574, 0.018924331, -0.0038835085, 0.0060501234, -0.01569419, 0.049998507, -0.01904653, 0.0032230413, -0.019662304, -0.008358756, -0.013460865, 0.0065116957, 0.0034610345, -0.018973177, 0.004109488, 0.024398454, -0.018270606, -0.013174686, 0.020341944, -0.012314899, 0.016231775, 0.019261865, 0.015190753, -0.0032424924, 0.012099743, -0.0048506665, -0.0030736676, -0.013995789, -0.005119908, -0.0049036867, 0.04076871, -0.006272048, 0.01964927, -0.010928114, -0.010002585, 0.0069559994, 0.029052923, 0.0034749992, -0.009018267, -0.0030517401, -0.028956687, -0.0023353826, 0.0062700445, -0.02901863, -0.008297744, 0.006232326, -0.015814122, 0.022042062, -0.016543563, -0.03301168, -0.019316193, -0.0030435629, -0.014972599, -0.010391119, 0.014357637, 0.0107155405, -0.008648491, 0.017338803, 0.01906286, -0.0055567026, 0.028424773, -0.022645047, 0.022397403, 0.014496168, -0.007771397, -0.0040291203, 0.008417062, 0.0025153407, 0.020618653, 0.01665875, 0.018763488, 0.001761911, 0.013061582, -0.014513414, 0.012087444, 0.015032094, -0.029182835, 0.0187016, 0.024862831, -0.016964097, -0.01950248, 0.008608155, 0.0012633805, -0.0019364496, -0.00072301785, -0.017652422, -0.034722503, -0.0079342555, -0.009784057, -0.03788417, -0.007694937, 0.020204091, 0.0076382705, -0.015932024, -0.010579802, -0.020447962, 0.0033945518, -0.07285643, 0.052548166, ...]",5
4,90dayfianceuncensored,"[-0.016648285, -0.0016126882, 0.009415893, -0.0016100111, 0.0019142216, -0.017117219, 0.04244234, -0.015882403, 0.0043267184, -0.018114535, -0.0011434428, -0.0148694515, 0.0066956542, 0.006140645, -0.01705578, 0.011066317, 0.017125696, -0.018565735, -0.01967116, 0.015638137, -0.015192008, 0.01662443, 0.014647336, 0.00776881, -0.0048861303, 0.009418479, -0.004536365, 0.0001366113, -0.017532269, 0.00096075976, -0.007773897, 0.03767317, -0.0058949995, 0.022984963, -0.005164784, -0.0073938337, 0.011315836, 0.03042451, 0.0017337339, -0.008336772, -0.004055907, -0.029913757, 0.0015786457, 4.0773466e-05, -0.024261171, -0.0093112765, -0.0025688244, -0.017866634, 0.013873043, -0.024502411, -0.02897177, -0.026675196, 0.0012092976, -0.01912418, -0.008045133, 0.013183872, 0.013678813, -0.008394562, 0.020386884, 0.024118226, -0.007886545, 0.029661028, -0.016760131, 0.019187475, 0.014510825, -0.012754731, -0.005630248, -0.0030878584, 0.0013569504, 0.019244732, 0.020337986, 0.016748028, -0.0024019107, 0.010417762, -0.011856027, 0.008205711, 0.02631944, -0.016911784, 0.021633776, 0.018766472, -0.0137266405, -0.012217221, 0.0104871625, 0.0026082199, -0.0041602687, 0.0005365952, -0.01724035, -0.038934346, -0.010131961, -0.005289653, -0.030049814, -0.011947373, 0.017801994, 0.0123516265, -0.015830277, -0.010853867, -0.023839882, 0.008481613, -0.0698053, 0.049212627, ...]",5
31,BeautyGuruChatter,"[-0.031506933, -0.017475396, 0.02932358, -0.007933318, 0.019502014, -0.009912258, 0.037796333, -0.009632719, -0.0037243392, -0.0151805235, -0.00055297307, -0.016167087, 0.00073085056, 0.0027748083, -0.005627279, 0.022947486, 0.017047688, -0.013421294, -0.0093181515, 0.004802724, -0.013654997, 0.001289274, 0.025785988, 0.015829025, -0.016120538, 0.0085367495, -0.010298732, -0.0012481312, -0.0065265424, -0.022858, -0.0133903, 0.04980634, -0.012284523, 0.014145563, -0.01510977, -0.006150022, 0.00053092174, 0.02521634, -0.012368418, 0.00043628292, 0.0017960422, -0.017185103, -0.004913633, -0.0048562586, -0.018417783, -0.011444453, 0.008734713, -0.020612212, 0.010279107, -0.013984489, -0.030954555, -0.03723637, -0.0023211616, -0.018536828, -0.01704366, 0.012705516, 0.000648483, -0.01305845, 0.022856418, 0.011517749, -0.011837544, 0.02129442, -0.013239893, 0.013464556, 0.023640225, -0.0027025933, -0.006117381, -0.010307735, 0.007708812, 0.011014635, 0.00088818313, 0.018464983, 0.00058482913, 0.019966284, -0.009917966, 0.019429645, 0.016383266, -0.028508224, 0.006543615, 0.035645768, 0.0059404443, -0.0037266451, 0.015718576, 0.011348711, -0.002287037, -0.0032208532, -0.012713223, -0.039040618, -0.0040824357, -0.019740919, -0.023227416, 0.01437084, 0.009875879, 0.008854812, -0.013529192, -0.015989067, -0.014478639, -0.003176922, -0.06279164, 0.040757895, ...]",5
33,BigBrother,"[-0.039386638, -0.0022022894, 0.004130546, -0.027260087, 0.007823714, -0.012827334, 0.07508562, -0.017934768, -0.00023544653, -0.028855324, -0.030521916, -0.023157666, 0.0050607035, 0.008021022, -0.016616432, 0.01680742, 0.026600786, -0.0029236008, -0.012774441, -0.0005331188, -0.017639853, 0.020004913, -0.002267007, 0.0075449804, -0.0036200506, 0.018698715, -0.0099354945, -0.005328426, -0.017433977, -0.025545018, -1.0290297e-05, 0.05020462, 0.0054886495, 0.030598512, -0.018316248, -0.010835674, -0.023545163, 0.023678921, -0.010788168, -0.021959044, 0.018995615, -0.033937257, -0.0077387467, 0.014595378, -0.026974652, -0.008056656, 0.007992021, -0.026417682, 0.013374946, 0.004885729, -0.039449126, -0.0387167, 0.007819932, -0.0064675584, -0.0035285114, 0.020746926, 0.0079959445, -0.014827199, 0.021007333, 0.022835812, -0.011768278, 0.022015918, -0.029669253, 0.018352672, 0.019996807, -0.018422952, -0.0074106804, 0.012400689, 0.0041399123, 0.02610262, 0.011802911, 0.023655536, 0.011444679, -0.019502075, -0.014405707, 0.01431227, -0.0036059036, -0.02451113, 0.02985875, 0.023103788, -0.009592078, -0.024789821, -0.025475763, -0.0038610653, 0.015342035, 0.03273565, -0.026934816, -0.027463056, -0.006575561, -0.018806243, -0.030445281, -0.023771157, 0.04574351, 0.023646493, -0.014809326, 0.0064492095, -0.0044386205, -0.017361913, -0.08766016, 0.06695848, ...]",5
38,BravoRealHousewives,"[-0.024962267, -0.005821488, 0.013831327, -0.012060196, 0.010278688, -0.007907242, 0.04785975, -0.013464538, 0.0042286706, -0.019342478, -0.0100135645, -0.024829373, 0.0052012824, -0.003887295, -0.028885685, 0.012563317, 0.04221264, -0.018131182, -0.007446288, 0.012878913, -0.021152008, 0.012541364, 0.019018894, 0.023033826, -0.0052519348, 0.016193965, 0.0020883207, 0.007914995, -0.018624367, -0.008757187, -0.011774431, 0.041799724, -0.005419882, 0.027218686, -0.012069762, -0.0072167767, 0.005202633, 0.019532446, -0.003623205, -0.0063372124, 0.005381483, -0.02574801, -0.002922522, -0.000250021, -0.024495378, -0.01513145, 0.0016087135, -0.026500339, 0.013133083, -0.021304604, -0.03677963, -0.01929556, -0.0057807826, -0.009517633, 0.002499576, 0.025655948, 0.009877984, -0.012175971, 0.027373973, 0.017074754, -0.016196327, 0.029323114, -0.01595174, 0.014222343, 0.010123936, -0.014461884, -0.0075663803, 0.009955366, -0.00012009338, 0.014478147, 0.008967159, 0.018650662, 0.011930045, 0.016827969, -0.005696523, 0.008945889, 0.02380341, -0.036299255, 0.018829392, 0.027993822, -0.020246008, -0.02047356, 3.7756534e-05, 0.002592285, -0.012392491, 0.00068599003, -0.014608772, -0.023267256, 0.0004292034, -0.017701844, -0.045331027, -0.00643224, 0.020348402, 0.006509259, -0.01589414, -0.008500677, -0.025108429, -0.00892773, -0.06290793, 0.05339235, ...]",5
62,DanLeBatardShow,"[-0.024644725, 0.016624544, 0.0071620494, -0.018387776, 0.007938515, -0.013697733, 0.07392647, -0.019048646, -0.0012683293, -0.033989962, -0.029603029, -0.01434052, 0.004388511, -0.0014056778, -0.022511143, 0.008383692, 0.016544731, -0.0064030318, -0.0043325517, -0.01190119, -0.015158352, 0.038392473, 0.0036132906, 0.015463124, 0.0046914844, 0.014159608, -0.007238133, 0.011428926, -0.01260015, -0.02412694, -0.003443782, 0.03622663, 0.015070256, 0.02255471, -0.013402803, -0.01909032, -0.007446236, 0.02654008, -0.008421251, -0.0011246646, 0.016656656, -0.027954042, -0.0033806753, 0.012434512, -0.024816275, -0.0007945735, -0.002866991, -0.028632615, 0.029079795, 0.01712726, -0.03332053, -0.019947756, 0.01065742, -0.012722236, 0.0075164675, 0.022742972, 0.009090326, 0.005038594, 0.016363697, 0.021395275, -0.013850842, 0.008149288, -0.027329836, 0.019123374, 0.015244345, -0.013921817, -0.017339904, 0.011028363, -0.008619606, 0.029935233, 0.014836715, 0.024051342, 0.014747934, -0.009692663, -0.009924316, 0.0075179436, 0.0039106635, -0.027334193, 0.022070052, 0.022348491, -0.015709622, -0.03347406, -0.022100285, -0.007334757, 0.008481312, 0.01313126, -0.005940399, -0.029817991, -0.0001993939, -0.01389412, -0.042308398, -0.024237383, 0.02846343, 0.01657541, -0.0053311796, 0.003221117, -0.012372341, -0.012539143, -0.08142082, 0.067525715, ...]",5
69,DevilMayCry,"[-0.05473357, -0.0018193095, 0.014696905, -0.014810628, -0.0017559415, -0.0222328, 0.049730163, -0.014094915, -0.0036887298, -0.01747119, -0.012888959, -0.014083738, 0.010026533, 0.022068324, 0.0035155553, 0.005879135, 0.024095977, 0.0080384845, -0.003665263, 0.008060939, -0.0018046617, 0.019954806, 0.016519109, -0.006804961, -0.021281896, 0.016134411, 0.003537847, 0.01581604, -0.005354834, -0.026389727, -0.003876826, 0.038723942, 0.005135664, 0.014555737, -0.01887499, -0.0032281845, -0.008262352, 0.018175105, -0.009476425, -0.02025619, 0.0039668498, -0.009665491, 0.017444465, 0.019776471, -0.015147808, 0.0039604683, -0.017833272, -0.03363508, 0.021265991, 0.0071231797, -0.033719458, -0.0390858, 0.0029013832, -0.0036234593, 0.0026899453, 0.022335412, 0.009674581, -0.010636608, 0.019908182, 0.011646502, -0.02148457, 0.009172339, -0.022714714, 0.019001909, 0.029355459, -0.018649647, -0.00058517384, -0.0004265464, 0.0015918787, 0.032556504, 0.009037948, 0.014288624, 0.02163827, -0.0053027645, -0.012809582, 0.0043539247, -0.010381256, -0.03762151, 0.005261674, 0.01580613, -0.0067897937, -0.0064249802, -0.022173656, 0.017848711, -0.014609487, 0.02065827, -0.0090616215, -0.025503933, -0.0057162596, -0.009393516, -0.026031079, -0.008651237, 0.014304287, 0.019581659, -0.029905876, -0.014885803, -0.0056135915, 0.0016019163, -0.05372014, 0.057251804, ...]",5
75,DomesticGirlfriend,"[-0.03271937, -0.008582452, 0.011744334, -0.011263273, 0.0048128036, -0.004421768, 0.028398534, -0.01146791, 0.024505243, -0.02345394, -0.012065339, -0.012517237, -0.010726841, -2.2024173e-05, -0.0069860485, 0.0015527636, 0.0018572394, -0.019994484, -0.027431738, -0.0043635396, -0.0029071223, 0.011051788, -0.00082000054, -0.0048262887, 1.6632137e-06, 0.0008348204, 0.000976321, -0.008138559, -0.016691482, -0.014881034, 0.007856619, 0.05082826, 0.011658828, 0.016635057, -0.007885061, -0.001979162, -0.017013859, 0.017511591, -0.0030455983, -0.018897098, 0.003202525, -0.018305382, 0.0016774116, 0.020458642, -0.0089842575, -0.005229362, -0.012602217, -0.043316048, 0.02448247, -0.0007472611, -0.036527287, -0.023697708, -0.0049824333, 0.010702989, 0.015057585, 0.015918588, 0.0086446665, -0.011413872, 0.02899319, 0.018569697, -0.0053390632, 0.01171033, -0.037436303, 0.00915787, 0.023688868, -0.022563156, -0.004451825, 0.0033068955, 0.0022961537, 0.031660136, 0.025348043, 0.010830432, 0.010535725, -0.0140130585, -0.027191743, 0.010219992, -0.004581041, -0.029855924, 0.01007672, 0.02015835, -0.013762913, -0.0153549425, -0.004098084, 0.00010885676, -0.0020191749, 0.025662249, -0.00039095824, -0.04512354, -0.0030154444, -0.005724227, -0.036850255, -0.01686307, 0.044755727, 0.025406387, -0.026749339, 0.018039223, -0.009175876, 0.0021511049, -0.06556878, 0.057111025, ...]",5
77,DunderMifflin,"[-0.021631986, 0.008189149, 0.005288804, -0.011899001, 0.0012847489, -0.008608081, 0.053504977, -0.01656004, 0.0044132974, -0.02863719, -0.008142437, -0.0041963584, 0.0061127213, 0.012791764, -0.016440358, 0.0059229266, 0.020759849, -0.007789271, -0.006360152, 0.008428657, -0.00451806, 0.02563167, 0.013826852, 0.004708274, 0.0015186951, 0.0033565594, -0.00058448134, -0.001782157, -0.012450359, -0.007347665, -0.00065549766, 0.035874985, 0.0035111909, 0.017020954, -0.018202163, -0.012668717, 3.3438984e-05, 0.03302527, -0.0015348423, -0.010556825, 0.007208723, -0.027458094, 0.0022161778, -0.00183883, -0.017880343, -0.0051189805, 0.0011819539, -0.022543944, 0.033745743, -0.008000209, -0.030863717, -0.020486722, -0.0016608014, -0.005471396, -0.0033559068, 0.008166662, 0.01153725, 0.0011249817, 0.014886751, 0.011778028, -0.008707268, 0.015695602, -0.022175055, 0.015147556, 0.02588505, -0.0101643205, -0.010492996, -0.0064211804, -0.0003155955, 0.027147338, 0.0069941706, 0.024690509, 0.0065717553, -0.0006288954, -0.018314255, 0.007951017, 0.0050842334, -0.025708009, 0.024151448, 0.025685245, -0.007891511, -0.020491978, -0.02157536, 0.003557357, 0.00518955, -0.005151947, -0.002968861, -0.03546167, -0.015109668, -0.0072283004, -0.032790367, -0.01032198, 0.03303245, 0.016572211, -0.015786935, -0.009263009, -0.013718648, -0.0017141892, -0.06940426, 0.059185192, ...]",5
85,FlashTV,"[-0.030517412, 0.0039381734, 0.006851924, -0.004037545, 0.0134370215, -0.0071960296, 0.040070467, -0.005754203, 0.00666619, -0.023854233, 0.0039007093, -0.0053809616, 0.0026883166, 0.014196151, -0.014226468, 0.0009729982, 0.011121612, -0.023168307, -0.017406799, 0.00026180127, 0.0061599747, 0.025754597, 0.01414655, 0.0074891546, -0.012614709, 0.015005155, -0.009809633, -0.0008913868, -0.012102764, -0.014484229, 0.006603284, 0.043052796, -0.0014934174, 0.011168865, -0.01313602, -0.009859693, 0.0039494913, 0.028817842, -0.012623014, -0.0144766085, 0.010003787, -0.013656695, -0.0059101693, 0.0038194747, -0.019583812, -0.003697681, -0.0038890499, -0.029401395, 0.026599498, -0.0024937517, -0.033986967, -0.015867792, -0.0046819267, -0.012283916, 0.014028493, 0.00070868206, 0.0124746645, -0.012604944, 0.035991304, 0.0046181222, 0.00096264697, 0.010655785, -0.021764982, 0.015948452, 0.023889424, -0.013624974, -0.0056242137, -0.012046416, 0.0021754089, 0.025699576, 0.019287447, 0.016379513, 0.008759469, -0.013180042, -0.01407014, 0.009650744, 0.012707517, -0.02385013, 0.018099843, 0.023604536, 0.0034742686, -0.028740317, -0.019907322, 0.0088673495, 0.009271584, 0.012788129, -0.0037163182, -0.042124897, -0.0020315005, -0.00818962, -0.030244997, -0.020314017, 0.045290016, 0.032277532, -0.011873908, -0.0052369814, -0.017099492, -0.0019500264, -0.06312906, 0.060563046, ...]",5


In [54]:
films = list(subreddit_embeddings[subreddit_embeddings['cluster']==5]['subreddit'])
print(films)

['90DayFiance', '90dayfianceuncensored', 'BeautyGuruChatter', 'BigBrother', 'BravoRealHousewives', 'DanLeBatardShow', 'DevilMayCry', 'DomesticGirlfriend', 'DunderMifflin', 'FlashTV', 'Kanye', 'MortalKombat', 'Music', 'NetflixBestOf', 'PandR', 'Persona5', 'RWBY', 'RedLetterMedia', 'SpoiledDragRace', 'SquaredCircle', 'TeenMomOGandTeenMom2', 'TheWalkingDeadGame', 'TwoBestFriendsPlay', 'WWE', 'arrow', 'asoiaf', 'blogsnark', 'danganronpa', 'fireemblem', 'freefolk', 'gamegrumps', 'greysanatomy', 'loveafterlockup', 'movies', 'moviescirclejerk', 'netflix', 'popheads', 'raimimemes', 'riverdale', 'rpdrcringe', 'rupaulsdragrace', 'saltierthancrait', 'shieldbro', 'starwarsspeculation', 'television', 'vanderpumprules', 'weezer']


In [55]:
l = 0
for s in films:
    l += len(get_text_from_subreddit(dataset, s))

print(l)

4987


In [56]:
clusters.loc[clusters['cluster']=='5', 'Number of samples'] = l

In [57]:
clusters

Unnamed: 0,cluster,topic label,Number of samples
0,0,Humor,13791.0
1,1,Other,5839.0
2,2,Sport,7042.0
3,3,Politics,10273.0
4,4,Love and relationship,9026.0
5,5,Film and Tv series,4987.0
6,6,Videogames,


### Cluster 6 (Videogames)

In [58]:
subreddit_embeddings[subreddit_embeddings['cluster']==6] #videogames

Unnamed: 0,subreddit,embedding,cluster
17,Artifact,"[-0.0198666, -0.0022379616, 0.01152883, -0.026738325, 0.005456137, -0.005533063, 0.023683058, 0.00043812994, -0.00043767173, -0.0010603125, -0.020378483, -0.0069580837, -0.009220059, 0.008687565, -0.00043128064, 0.0043468373, 0.008497436, -0.018500222, -0.012610156, 0.0071502654, -0.019249985, 0.0015196075, -0.010674201, 0.0003010751, -0.011782145, -0.007023567, -0.014429361, 0.01882879, -0.011352981, -0.023777101, -0.015643151, 0.04511947, 0.012563895, 0.0011402726, -0.023952907, -0.0052734097, 0.0003162527, 0.0063878484, -0.023627277, -0.027180495, 0.0047785584, -0.0045185187, -0.011329987, 0.00949867, -0.0061824103, -0.0069062393, -0.0062262923, -0.015641527, 0.022837948, 0.0055600507, -0.017523108, -0.017168706, -0.0074256505, -0.011856682, 0.016687088, 0.019579798, -0.004839783, 0.00402621, 0.018300753, -0.0041286643, 0.00042334522, 0.001622398, -0.021779804, 0.017641734, 0.01950334, -0.006285275, 0.00802107, 0.0073556704, -0.014402541, 0.014871675, -0.0015451121, 0.01219746, 0.004047034, 0.009251943, 0.0049465587, 0.028852612, -0.015688555, -0.037930146, 0.014948681, 0.018141171, -0.004803227, -0.011899422, -0.011642317, 0.004481455, -0.00248636, -0.004511024, 0.002469585, -0.01131168, 0.018749828, -0.009774004, 0.0060710986, 0.006806218, 0.043620955, 0.02470648, -0.008841595, 0.008400932, -0.01670872, 0.0074748537, -0.0522001, 0.06458523, ...]",6
29,Battlefield,"[-0.008459435, -0.008373664, 0.009555666, -0.027318653, 0.028762814, -0.012159255, 0.005491361, -0.008138666, -0.005928842, 0.0018658652, -0.00087916333, 0.007357918, -0.005129148, 0.024429787, 0.0017641392, -0.00063874247, 0.020211862, -0.02683749, -0.006309793, 0.005574658, -0.023989705, 0.0014539086, 0.0084316395, 0.0050032884, -0.013733774, -0.004566612, -0.017649723, 0.016956417, -0.010952816, -0.020760901, -0.0129849035, 0.040322024, 0.007989358, -0.0031029263, -0.022467729, -0.008053687, 0.014389284, -0.001960128, -0.01893303, -0.0130294645, -0.0051310565, 0.0038728197, 0.010808338, 0.014488329, 0.0017486267, 0.013083702, -0.0036897303, -0.025263274, 0.034902476, 0.0006308794, -0.0037242214, -0.03390495, -0.009565788, -0.006147524, 0.006707942, 0.010005893, -0.003848868, 0.011299738, 0.023572354, 0.010350032, -0.015689617, 0.0070620324, -0.00808776, 0.013124596, 0.035617404, 0.0027559113, 0.0039101806, -0.0074982485, -0.005544108, 0.014356727, -0.0077827484, 0.01625516, -0.0018770546, 0.01062895, -0.024942901, 0.026817972, -0.011948443, -0.03539549, 0.01000765, 0.014104233, 0.00060653343, -0.00039038027, -0.014728146, 0.017210145, -0.0014929187, 0.00054171693, -0.0048868177, -0.029010588, 0.0053327144, 0.011414711, -0.01347288, 0.0018688523, 0.04237341, 0.03372249, -0.022538722, -0.009322068, -0.0048873816, 0.0062561166, -0.045820244, 0.039958064, ...]",6
30,BattlefieldV,"[-0.0061130486, 0.010125351, 0.0058964132, -0.023217155, 0.013659836, -0.013347101, 0.017119024, 0.0011825864, -0.022232337, 0.0033588442, -0.0019847502, 0.011290497, -0.005039201, 0.01191994, 0.012397225, 0.005491904, 0.015032367, -0.021340707, -0.019091075, 0.01668959, -0.027686909, 0.00384833, 0.018080864, 0.006467695, -0.020751422, -0.0074413884, -0.0056996783, 0.016180135, -0.010784246, -0.036339965, -0.020095624, 0.039388567, -0.0032537198, -0.0046402537, -0.020054022, -0.0074571036, 0.011756467, 0.010540151, -0.022032091, -0.01644989, -0.005688778, 0.0054461607, 0.018899284, 0.0138603905, -0.0050335983, 0.0033955358, -0.010369216, -0.024839828, 0.031216033, -0.005430619, -0.0062975194, -0.03162753, -0.00868917, -0.007776179, 0.022001287, 0.0054543796, -0.0063106497, 0.006995522, 0.02306939, -0.0069311815, -0.009387305, 0.0075244308, -0.01430389, 0.008510666, 0.028579148, -0.0041149817, 0.0009843493, -0.016313182, -0.015446288, 0.019161083, -0.012199738, 0.026740428, -0.0037593502, 0.016634481, -0.012083638, 0.033606462, -0.006583714, -0.03135926, 0.0131659405, 0.01436251, -0.0008920922, -0.0048139635, -0.012051235, 0.018491508, 0.005413608, 0.00480757, -0.0058244094, -0.026047194, 0.014148233, 0.0055755964, 0.003992149, 0.0017394791, 0.033584766, 0.03224846, -0.01499525, -0.0094067715, -4.3071144e-05, 0.009151345, -0.05193326, 0.042332016, ...]",6
35,Blackops4,"[-0.009802968, -0.011861717, 0.007658825, -0.026142433, 0.01676337, -0.007826069, 0.030844754, -0.0014009068, 0.0014028667, 0.0042038267, 0.0032640498, -0.0015615737, 0.0013611892, 0.012812284, 0.0046405564, 0.0053463057, 0.017728407, -0.017595328, -0.01426059, 0.005467236, -0.025902119, -0.008958851, -0.005650463, 0.0032261137, -0.015370174, -0.0067123813, -0.017338749, 0.023531085, -0.014354919, -0.024319142, -0.015410031, 0.024103202, 0.00561813, 0.0038818787, -0.018632736, -0.010484418, 0.00976737, 0.004071047, -0.022091564, -0.016917232, -0.0018186554, 0.006778048, 0.005512291, 0.010719523, -0.003063602, 0.00048755476, -0.0073115835, -0.026115078, 0.033741347, 0.0045881732, -0.005277076, -0.023184683, -0.013096198, 0.00036780315, 0.01852674, 0.01841811, -0.0058145206, 0.017487291, 0.022611111, 0.0066916524, -0.014938613, 0.0024659338, -0.01811551, 0.008218752, 0.037999183, 0.0024757148, 0.009959373, -0.011721594, -0.008532282, 0.01669485, -0.015905492, 0.017803924, 0.0026889746, 0.0064063403, -0.0139308665, 0.027094994, -0.0095288325, -0.041373994, 0.010770705, 0.02449378, -0.0017656713, -0.004577181, -0.018702824, 0.010046683, 0.0033612603, -0.0039285845, -0.0044619413, -0.014885657, 0.00993474, -0.0009505182, -0.004378926, 0.006835866, 0.033325817, 0.02909825, -0.017306019, -0.0033742825, -0.002645716, 0.00994745, -0.051760487, 0.053296506, ...]",6
39,Brawlhalla,"[-0.02738111, -0.006866331, 0.0103861885, -0.006473094, 0.0117184315, -0.013415456, 0.04722197, -0.0066208746, -0.0052441936, -0.00448332, 0.0009569981, -0.02616216, 0.0024749849, 0.0059732557, -0.0065099085, 0.027745062, 0.012290924, -0.013456011, -0.025971849, -0.005480849, -0.023170423, 0.011343773, -0.008331753, 0.00785581, -0.01944133, -0.0133070955, -0.01384378, 0.022683667, -0.0049547586, -0.031606838, -0.0148697505, 0.030511234, 0.0062323464, 0.009150268, -0.029675031, -0.0058914647, -0.00096603186, 0.007417612, -0.016920023, -0.017838588, -0.0027202307, -0.011969498, -0.0013421855, -0.0034892885, -0.014998139, 0.007917421, -0.0105486885, -0.026818883, 0.008031446, -0.007970049, -0.03227224, -0.028753519, -0.0053215884, 0.00430329, 0.004526671, 0.027913889, -0.003991679, -0.004516987, 0.0346222, 0.0032822834, 0.0030958646, 0.02002207, -0.013896158, 0.005400258, 0.02854386, -0.020139573, 0.003995407, -0.0085704345, -0.008973756, 0.02651815, 0.006343206, 0.0060048443, -0.0014442418, 0.012355691, -0.0035712149, 0.017262938, -0.008035444, -0.032106806, 0.015114127, 0.021776045, -0.010267444, -0.0085971225, -0.021304395, 0.010774966, 0.0065181786, 0.011582034, -0.0034817462, -0.021085225, 0.0031403513, -0.0068050693, 0.003208465, 0.01692498, 0.02577051, 0.025909359, -0.014195662, -0.0131253535, 0.003338713, 0.014158909, -0.06560442, 0.06781247, ...]",6
43,CODZombies,"[-0.023187857, -0.0032886714, 0.0027531262, -0.019181198, 0.01765662, -0.008303642, 0.030593975, -0.011947616, 0.010038286, 0.0020707431, -0.010327971, -0.007995106, -0.0034789543, 0.009621273, -0.0055559254, -0.0040309406, 0.010640062, -0.022306126, -0.013457037, 0.01480791, -0.019172117, -0.0036804467, -0.009296382, -0.0002655603, -0.01130825, 0.0027340464, -0.00978568, 0.021477388, -0.01299447, -0.016588466, -0.030156165, 0.046030287, -0.0021729658, -0.0020543113, -0.017249377, -0.008847612, 0.00993292, 0.008550809, -0.014589037, -0.0151009755, 0.010309413, -0.005554736, -0.0011394531, 0.015271738, -0.0070410836, 0.0087446775, -0.013571687, -0.022302287, 0.02568354, -0.0010286074, -0.00017348253, -0.020623425, -0.009555308, 0.007390254, 0.006877905, 0.028303476, -0.008140721, -0.0012038187, 0.017499164, 0.0033270584, -0.012496394, 0.0026208186, -0.0124038495, 0.025975494, 0.0370334, -0.0062580463, -0.0039672256, -0.0072552213, -0.0064885467, 0.00930021, -0.0031881954, 0.019769013, 0.0009578855, 0.01244133, -0.00081368856, 0.01925882, -0.011347433, -0.050290324, 0.008784629, 0.020430896, -0.0038473445, 0.0027895973, -0.011182153, 0.0069776517, -0.005780864, -0.0077519994, -0.008702152, -0.020247135, 0.0033365951, -0.008057131, -0.0030352427, 0.005807346, 0.02896384, 0.017207915, -0.021113627, -0.0033009634, -0.008130208, 0.011410624, -0.06776411, 0.04321134, ...]",6
53,CoDCompetitive,"[-0.020115595, -0.0038411978, 0.0016473253, -0.026451219, 0.015503287, -0.0052842246, 0.04127751, 0.007894038, 0.0067099044, 0.0014540185, -0.025420154, -0.010983652, -0.0030027917, 0.017633185, -0.001706594, -0.0022884544, 0.003956241, -0.022739703, -0.01345621, -0.012712002, -0.022742359, 0.0046939515, -0.013553585, 0.006038446, -0.001972093, -0.0051833494, -0.018186929, 0.02326968, -0.0057741897, -0.019081762, -0.01720496, 0.029105045, 0.008215324, 0.006729963, -0.02198698, -0.002998425, -0.006681409, 0.008077405, -0.017402016, -0.014019368, 0.020167897, -0.020188808, -0.0070590633, 0.008673867, -0.009743232, 0.00082870893, -0.0063715773, -0.018526744, 0.0017838186, 0.013500887, -0.012081965, -0.021584041, 0.00042037695, -0.012878391, 0.015490021, 0.039453596, -0.014478309, 0.009816822, 0.024267148, 0.0009880405, -0.001149716, -0.0043781525, -0.032937102, 0.0031651221, 0.015991043, -0.010926391, 0.0043169176, 0.01866002, -0.00940252, 0.023949679, 0.008878273, 0.00047443729, -0.002208111, 0.007703726, 0.011722717, 0.03259691, -0.014154032, -0.027825898, 0.022554524, 0.013497379, 0.0014674608, -0.017582407, -0.012328071, -0.0010060143, -0.0013760899, 0.0022431652, -0.0059434106, -0.020373203, 0.01661449, -0.0018410193, -0.0068090064, -4.5087883e-05, 0.03595487, 0.023911359, -0.005437068, 0.008078398, -0.0047927597, 0.0026899597, -0.051037684, 0.06565022, ...]",6
56,CompetitiveForHonor,"[-0.017153082, 0.0017101533, -0.0037984957, -0.012151514, 0.010197565, -0.011848882, 0.019154793, 0.0030538132, -0.011069204, 0.0016269935, 0.010400352, 0.009241128, -0.0037021413, 0.0040158355, 0.0064801043, 0.00755243, 0.012428191, 0.0011216414, -0.0160318, 0.0041304803, -0.013576961, -0.003423948, 0.00075309054, 0.0061503663, -0.021756755, -0.0050872318, -0.019374665, 0.012256599, -0.0015399449, -0.03475999, -0.017545054, 0.02895892, 0.009227421, 0.0092013, -0.046298698, 0.013292595, -0.003575444, 0.028434463, -0.021735521, -0.020386327, 0.0051723393, -0.00030003104, -0.0043883575, 0.0076177237, 0.0028320902, 0.0060332124, -0.01078487, -0.027910447, 0.018631117, -0.009333452, -0.017403053, -0.02484656, -0.0056288205, 0.007055588, 0.014214535, 0.015098477, -0.011060541, 0.015226417, 0.031017125, 0.010798853, 0.00080921804, 0.004502229, -0.011883009, 0.011574976, 0.0316458, -0.014456202, -0.00095716066, 0.0026815112, -0.00387985, 0.017142234, 0.0018106421, 0.0023354974, -0.00579317, -0.00045336143, -3.009583e-06, 0.011704251, -0.01986979, -0.023253467, 0.012325049, 0.010878925, -0.014951521, -0.010689745, -0.018557638, 0.016279982, 0.017028004, 0.010859187, -0.001388093, -0.02553486, 0.0147203505, 0.0026695118, 0.007853172, -0.0041763242, 0.04152234, 0.02631031, -0.0038391468, -0.0042843386, -0.0141690625, 0.0077962163, -0.053120848, 0.041654043, ...]",6
57,Competitiveoverwatch,"[-0.02064558, -0.004080696, 0.004760527, -0.026410272, 0.020588644, -0.0052617094, 0.04520529, 0.0064670653, -0.0019386404, -0.009949252, -0.0064480207, -0.006180372, 0.0013983835, 0.01728612, -0.0005145227, -0.0016655634, 0.0299436, -0.01214974, -0.023815885, -0.008868801, -0.0129304705, -0.011532821, 0.0041304566, 0.0073334686, -0.015004637, -0.008928091, -0.027312625, 0.014828117, -0.00037637804, -0.021938955, -0.0066256523, 0.035412718, 0.00435507, 0.008860916, -0.04053427, 0.007770235, -0.0033021155, 0.030385308, -0.020713815, -0.019875892, 0.01392172, -0.009565125, -0.0016767812, -0.005359768, -0.021884322, -0.008217367, -0.006578099, -0.022873174, 0.0010743553, -0.00096875255, -0.023744216, -0.027630031, 0.021119451, -0.0029877305, 0.0005640818, 0.01934855, -0.014920024, -0.005775884, 0.016073128, -0.0010945548, -0.004629696, 0.015649498, -0.009863925, -0.0012541562, 0.011258765, -0.0025746361, 0.0019024267, 0.018357946, -0.00022985571, 0.018078577, 0.008707196, 0.0056405356, 0.0032585273, 0.0020257728, 0.0018195639, 0.030292002, -0.0047065136, -0.031371657, 0.018763544, 0.01586541, 0.009021561, -0.02123382, -0.012737036, 0.0074685602, 0.011389551, 0.008922984, -0.016528685, -0.03459831, 0.013052194, -0.0012913826, -0.0005973012, 0.004923424, 0.038843468, 0.027623307, -0.002784143, 0.011870342, -0.0149805825, 0.012481695, -0.05509111, 0.05262093, ...]",6
88,FortniteCompetitive,"[-0.017992705, -0.0034039188, 0.003557393, -0.020852802, 0.016991643, -0.00536534, 0.036985435, 0.0021116333, -0.006055182, 0.0076867114, -0.011790549, -0.01446488, 0.0075184456, 0.021666037, 0.0010579977, -0.0027008473, 0.01415349, -0.01395531, -0.018926647, -0.0036990973, -0.0212012, -0.0008177757, -0.00614119, 0.005767203, -0.008923416, -0.007136501, -0.020194553, 0.021377278, -0.014927938, -0.029373264, -0.012964782, 0.012638265, -0.0034143862, 0.002982661, -0.027340813, -0.005343306, -0.0022212795, 0.007815385, -0.02920082, -0.01626777, 0.0013272444, -0.0018771775, 0.007884456, 0.0022832407, 0.009209781, 0.0026389367, -0.011983838, -0.023774391, 0.031772584, 0.0015587464, -0.013964626, -0.023033038, 0.0072493036, -0.012958533, 0.024419097, 0.016360652, -0.00940821, 0.01802269, 0.022783391, -0.0016788634, 0.005889833, -0.007622551, -0.01877913, 0.00061875297, 0.025609337, -0.012760451, 0.008555804, 0.0054534515, -0.0028724317, 0.02744904, -0.0009919768, 0.008996244, -0.005594071, 0.015694344, 0.0002425087, 0.029873969, -0.019900303, -0.026672004, 0.015413291, 0.029287994, 0.011654867, -0.004839324, -0.005098133, 0.004809706, 0.012495228, -0.0036010856, 0.0009587591, -0.0115977265, 0.015489219, 0.0036277298, 0.00047606087, 0.005803863, 0.03563154, 0.03133896, -0.0032595417, -0.0038257833, -0.009866262, -0.008956818, -0.05640261, 0.054706186, ...]",6


In [59]:
videogames = list(subreddit_embeddings[subreddit_embeddings['cluster']==6]['subreddit'])
print(videogames)

['Artifact', 'Battlefield', 'BattlefieldV', 'Blackops4', 'Brawlhalla', 'CODZombies', 'CoDCompetitive', 'CompetitiveForHonor', 'Competitiveoverwatch', 'FortniteCompetitive', 'Games', 'GlobalOffensive', 'MLBTheShow', 'MMORPG', 'Overwatch', 'OverwatchUniversity', 'PUBGXboxOne', 'Paladins', 'RedDeadOnline', 'SSBM', 'Seaofthieves', 'Smite', 'SoulCalibur', 'StarWarsBattlefront', 'StreetFighter', 'Tekken', 'TownofSalemgame', 'assassinscreed', 'dayz', 'deadbydaylight', 'farcry', 'forhonor', 'heroesofthestorm', 'leagueoflegends', 'masseffect', 'pcgaming', 'poker', 'reddeadredemption', 'reddeadredemption2', 'truegaming']


In [60]:
l = 0
for s in videogames:
    l += len(get_text_from_subreddit(dataset, s))

print(l)

3305


In [61]:
clusters.loc[clusters['cluster']=='6', 'Number of samples'] = l

In [62]:
clusters

Unnamed: 0,cluster,topic label,Number of samples
0,0,Humor,13791
1,1,Other,5839
2,2,Sport,7042
3,3,Politics,10273
4,4,Love and relationship,9026
5,5,Film and Tv series,4987
6,6,Videogames,3305


### Results

In [63]:
l = 0
for _, row in clusters.iterrows():
    l += row['Number of samples']

print(l)

54263


In [64]:
sum = 0
for i in range(10):
    l = get_lenght(subreddit_embeddings, i)
    print(f"number of samples cluster {i} = {l}")
    sum += l 

print(sum)

number of samples cluster 0 = 13791
number of samples cluster 1 = 5839
number of samples cluster 2 = 7042
number of samples cluster 3 = 10273
number of samples cluster 4 = 9026
number of samples cluster 5 = 4987
number of samples cluster 6 = 3305
number of samples cluster 7 = 0
number of samples cluster 8 = 0
number of samples cluster 9 = 0
54263


In [67]:
subreddit_embeddings['cluster'].value_counts().sort_index()

cluster
0    112
1     68
2     62
3     89
4     65
5     47
6     40
Name: count, dtype: int64

In [None]:
clusters.to_csv('data/clusters.csv', index=False)

In [69]:
subreddit_embeddings.to_csv('data/subreddit_embeddings.csv', index=False)