In [37]:
import pandas as pd

# ---- Load the four main tables (TSV) ----
incels_is_comments   = pd.read_csv("data/incels.is_AllComments.anon",           sep="\t", encoding="utf-8", on_bad_lines="skip")
reddit_incel_posts   = pd.read_csv("data/reddit-incel-posts.anon.txt",          sep="\t", encoding="utf-8", on_bad_lines="skip", dtype = str)
reddit_incelexit = pd.read_csv("data/reddit-IncelExit-posts.anon.txt",          sep="\t", encoding="utf-8", engine="python", quoting=3, on_bad_lines="skip", dtype=str)
saidit_incel_posts   = pd.read_csv("data/saidit-incel-posts.anon.txt",          sep="\t", encoding="utf-8", on_bad_lines="skip")
braincels_incel_posts   = pd.read_csv("data/reddit-braincels-posts.anon.txt",          sep="\t", encoding="utf-8", on_bad_lines="skip", dtype=str)

braincels_incel_posts = braincels_incel_posts.iloc[1:].reset_index(drop=True)

# Optional: keep them in a dict for easy access
dfs = {
    "incels_is_comments": incels_is_comments,
    "reddit_incel_posts": reddit_incel_posts,
    "reddit_incelexit": reddit_incelexit,
    "saidit_incel_posts": saidit_incel_posts,
    "braincels_incel_posts": braincels_incel_posts
}

# streamline column names 
colnames = ["link", "comment_id", "user_id", "parent", "timestamp", "title", "text"]
for name, df in dfs.items():
    df.columns = colnames

# Quick peek so you can see they loaded
for name, df in dfs.items():
    print(f"{name}: {df.shape}")
    display(df.head(10))

print("Unique users in each dataset:")
for name, df in dfs.items():
    print(f"{name}: {len(df['user_id'].unique())}")

#total unique users across all datasets
all_users = pd.concat([df['user_id'] for df in dfs.values()]).unique()
print(f"Total unique users across all datasets: {len(all_users)}")

#total length of all datasets combined 
total_length = sum(df.shape[0] for df in dfs.values())
print(f"Total length of all datasets combined: {total_length}")

incels_is_comments: (6411970, 7)


Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11015920,924979200.0,,1681672657,NSFW 0/10 deformed curry with micropenis fuck ...,View: https://youtu.be/U65-R5UojX8
1,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11015929,924979800.0,,1681672779,,Why are you making clickbait titles?
2,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11015932,924980200.0,,1681672800,,"i heard about that video before, he's just a ..."
3,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11015939,924980200.0,,1681672859,,
4,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11015941,924980400.0,,1681672862,,ok
5,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11015943,924981000.0,post-11015920,1681672904,,
6,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11015944,924981500.0,,1681672908,,Mogs me
7,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11016010,924982200.0,post-11015939,1681673768,,
8,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11016078,924982200.0,,1681674948,,A literal cuck mogs me. Was the bull a curry?
9,0-10-deformed-curry-with-micropenis-fuck-ultim...,post-11016138,924983000.0,post-11015943,1681675953,,Penguin from Wish


reddit_incel_posts: (988060, 7)


Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,/r/Incels/comments/4k4c9k,4k4c9k,924979784.370224,,1463687543,The cure for your disease,
1,/r/Incels/comments/5VNscmKPVjD9HfBtIpGvtO0X-fX...,5VNscmKPVjD9HfBtIpGvtO0X-fXQRg-ccgQCcLJTaBQ,924980486.069293,,1463454980,Why you're incel,
2,/r/Incels/comments/t5_2y2u1,t5_2y2u1,924980047.255993,,1463453936,Hello,
3,/r/Incels/comments/t5_2y2u1,t5_2y2u1,924980789.8589,,1455499334,the internet's first asocial network,
4,/r/Incels/comments/2716it,2716it,924981346.025006,,1401634505,Incels - People who are alone - People who fee...,
5,/r/Incels/comments/t5_2y2u1,t5_2y2u1,924982222.493196,,1395136982,Why you’re (probably) a feminist or their usef...,
6,/r/Incels/comments/t5_2y2u1,t5_2y2u1,924982222.493196,,1395043047,A 48 year-old virgin's blog,
7,/r/Incels/comments/20m6pu,20m6pu,924982222.493196,,1395042944,A blogger TarnishedSophia has questions for th...,
8,/r/Incels/comments/1ue0yu,1ue0yu,924982552.262455,,1388842629,Have you ever been so far gone that you can't ...,
9,/r/Incels/comments/d3folfo,d3folfo,924983147.481448,d3fo82o,1463957409,,"I hate this psuedoscience of that girls want, ..."


reddit_incelexit: (111083, 7)


Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f4590y6,924979382.138533,djb2a2,1571369186,,I will... nicely shove mental health and well-...
1,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f49ec70,924979824.167305,djb2a2,1571463603,,Black pill points and counter points are fine....
2,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f47q9i0,924980486.069293,djb2a2,1571426361,924980486.069293,I'm already having a hard time understanding w...
3,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f4586f1,924979382.138533,djb2a2,1571368533,,TIME TO SHOVE MENTAL HEALTH AND WELL-BEING INT...
4,/r/IncelExit/comments/djax7x/hello_brocels_is_...,f438fm6,924979824.167305,djax7x,1571339512,,Wrong Subreddit friend. Thats a permaban.
5,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f458otg,924979824.167305,djb2a2,1571368920,,Just remember the rules. This is not IncelTear...
6,/r/IncelExit/comments/djb93n/not_an_incel_recr...,f44v1mc,924980763.514056,djb93n,1571359416,,"Good luck with this, and thanks for making the..."
7,/r/IncelExit/comments/djb93n/not_an_incel_recr...,f47kpwj,924979824.167305,djb93n,1571423698,,"If you aren't looking for answers, you won't f..."
8,/r/IncelExit/comments/djbk5o/what_kind_of_advi...,f43m2yg,924981586.192561,djbk5o,1571343047,,I was never what one would consider an outgoin...
9,/r/IncelExit/comments/djb93n/not_an_incel_recr...,f47j736,924980486.069293,djb93n,1571423095,924980486.069293,[deleted]


saidit_incel_posts: (13082, 7)


Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,110_white_incel_with_poor_eyesight_found_a_way...,ao7j,924979700.0,amjw,1582916198,,"<p>LMAO saw some your videos, hilarious stuff ..."
1,110_white_incel_with_poor_eyesight_found_a_way...,aqbn,924979700.0,amjw,1582916198,,<p>Thanks hahahaha subscribe to my channel and...
2,110_white_incel_with_poor_eyesight_found_a_way...,amjw,924979700.0,amjw,1582916198,,<p>I don&#39;t know how but he hacked the blac...
3,2009_it_redditor.html,wryk,924980000.0,wryk,1560473540,,<p>How much did channel pay them?</p>
4,2019_was_the_golden_age_of_blackpill_on_reeeee...,nzvo,924980200.0,o1yb,1597781415,,<p>Need Platinum Age.</p>
5,2019_was_the_golden_age_of_blackpill_on_reeeee...,o1yb,924980700.0,o1yb,1597781415,,<p>Yep</p>
6,2019s_most_shocking_halloween_costume_award_go...,2u2x,924981400.0,2us8,1572711822,,<p>I bet she was so wet while doing the make u...
7,2019s_most_shocking_halloween_costume_award_go...,2u20,924981600.0,2us8,1572711822,,<p>Roasties be like &quot;that costume is SCAR...
8,2019s_most_shocking_halloween_costume_award_go...,2tqr,924981900.0,2us8,1572711822,,<p>Typical. That&#39;s what they truly want. B...
9,2019s_most_shocking_halloween_costume_award_go...,2uk2,924982200.0,2us8,1572711822,,<p>Girls are not all the same sweetie. They al...


braincels_incel_posts: (2216968, 7)


Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,https://www.reddit.com/r/Braincels/comments/77...,77ru0b,924979030.686585,,1508566505,Getting Settled in,General discussion and Q&amp;A thread since we...
1,/r/Braincels/comments/doqectl/comment/doqgmkg,doqgmkg,931484113.951227,doqectl,1508701504,,Thanks. It just seemed that it would be anothe...
2,/r/Braincels/comments/doqe6l5/comment/doqectl,doqectl,924979030.686585,doqe6l5,1508699066,,"To your last question, of course your safe her..."
3,/r/Braincels/comments/77ru0b/comment/doq6k7h,doq6k7h,931484113.951227,77ru0b,1508690481,,Is FHO also used here or is this like FA / IWH
4,/r/Braincels/comments/doplfy9/comment/dopnsx0,dopnsx0,924979030.686585,doplfy9,1508652013,,I think the main thing about r/IncelDiscussion...
5,/r/Braincels/comments/dopk1cs/comment/dopm17l,dopm17l,924979139.192799,dopk1cs,1508648408,,Great! I pmed the guy who wrote the treatise a...
6,/r/Braincels/comments/77ru0b/comment/doplfy9,doplfy9,925078641.54442,77ru0b,1508647327,,This has been tried.\n\n/r/IncelDiscussions\n\...
7,/r/Braincels/comments/dopj9dk/comment/dopk1cs,dopk1cs,924979030.686585,dopj9dk,1508644922,,That's a very good idea!\n\nThis sub's all abo...
8,/r/Braincels/comments/77ru0b/comment/dopj9dk,dopj9dk,924979139.192799,77ru0b,1508643701,,Would it be possible to repost very intellectu...
9,/r/Braincels/comments/dopbxoz/comment/dopcpxa,dopcpxa,924979030.686585,dopbxoz,1508635090,,Of course!


Unique users in each dataset:
incels_is_comments: 13241
reddit_incel_posts: 29877
reddit_incelexit: 4005
saidit_incel_posts: 590
braincels_incel_posts: 69304
Total unique users across all datasets: 115031
Total length of all datasets combined: 9741163


In [38]:
# Optional: keep them in a dict for easy access
dfs_reddit = {
    "reddit_incel_posts": reddit_incel_posts,
    "reddit_incelexit": reddit_incelexit,
    "braincels_incel_posts": braincels_incel_posts
}

# Quick peek so you can see they loaded
for name, df in dfs_reddit.items():
    print(f"{name}: {df.shape}")
    display(df.head(10))

print("Number of posts r/incel: ", len(reddit_incel_posts))
print("Number of posts r/incelexit: ", len(reddit_incelexit))
print("Number of posts r/braincels: ", len(braincels_incel_posts))
print("Number of total posts: ", len(reddit_incel_posts) + len(reddit_incelexit) + len(braincels_incel_posts))

print("Unique users in each Reddit dataset:")
for name, df in dfs_reddit.items():
    print(f"{name}: {len(df['user_id'].unique())}")


reddit_incel_posts: (988060, 7)


Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,/r/Incels/comments/4k4c9k,4k4c9k,924979784.370224,,1463687543,The cure for your disease,
1,/r/Incels/comments/5VNscmKPVjD9HfBtIpGvtO0X-fX...,5VNscmKPVjD9HfBtIpGvtO0X-fXQRg-ccgQCcLJTaBQ,924980486.069293,,1463454980,Why you're incel,
2,/r/Incels/comments/t5_2y2u1,t5_2y2u1,924980047.255993,,1463453936,Hello,
3,/r/Incels/comments/t5_2y2u1,t5_2y2u1,924980789.8589,,1455499334,the internet's first asocial network,
4,/r/Incels/comments/2716it,2716it,924981346.025006,,1401634505,Incels - People who are alone - People who fee...,
5,/r/Incels/comments/t5_2y2u1,t5_2y2u1,924982222.493196,,1395136982,Why you’re (probably) a feminist or their usef...,
6,/r/Incels/comments/t5_2y2u1,t5_2y2u1,924982222.493196,,1395043047,A 48 year-old virgin's blog,
7,/r/Incels/comments/20m6pu,20m6pu,924982222.493196,,1395042944,A blogger TarnishedSophia has questions for th...,
8,/r/Incels/comments/1ue0yu,1ue0yu,924982552.262455,,1388842629,Have you ever been so far gone that you can't ...,
9,/r/Incels/comments/d3folfo,d3folfo,924983147.481448,d3fo82o,1463957409,,"I hate this psuedoscience of that girls want, ..."


reddit_incelexit: (111083, 7)


Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f4590y6,924979382.138533,djb2a2,1571369186,,I will... nicely shove mental health and well-...
1,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f49ec70,924979824.167305,djb2a2,1571463603,,Black pill points and counter points are fine....
2,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f47q9i0,924980486.069293,djb2a2,1571426361,924980486.069293,I'm already having a hard time understanding w...
3,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f4586f1,924979382.138533,djb2a2,1571368533,,TIME TO SHOVE MENTAL HEALTH AND WELL-BEING INT...
4,/r/IncelExit/comments/djax7x/hello_brocels_is_...,f438fm6,924979824.167305,djax7x,1571339512,,Wrong Subreddit friend. Thats a permaban.
5,/r/IncelExit/comments/djb2a2/welcome_to_the_va...,f458otg,924979824.167305,djb2a2,1571368920,,Just remember the rules. This is not IncelTear...
6,/r/IncelExit/comments/djb93n/not_an_incel_recr...,f44v1mc,924980763.514056,djb93n,1571359416,,"Good luck with this, and thanks for making the..."
7,/r/IncelExit/comments/djb93n/not_an_incel_recr...,f47kpwj,924979824.167305,djb93n,1571423698,,"If you aren't looking for answers, you won't f..."
8,/r/IncelExit/comments/djbk5o/what_kind_of_advi...,f43m2yg,924981586.192561,djbk5o,1571343047,,I was never what one would consider an outgoin...
9,/r/IncelExit/comments/djb93n/not_an_incel_recr...,f47j736,924980486.069293,djb93n,1571423095,924980486.069293,[deleted]


braincels_incel_posts: (2216968, 7)


Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,https://www.reddit.com/r/Braincels/comments/77...,77ru0b,924979030.686585,,1508566505,Getting Settled in,General discussion and Q&amp;A thread since we...
1,/r/Braincels/comments/doqectl/comment/doqgmkg,doqgmkg,931484113.951227,doqectl,1508701504,,Thanks. It just seemed that it would be anothe...
2,/r/Braincels/comments/doqe6l5/comment/doqectl,doqectl,924979030.686585,doqe6l5,1508699066,,"To your last question, of course your safe her..."
3,/r/Braincels/comments/77ru0b/comment/doq6k7h,doq6k7h,931484113.951227,77ru0b,1508690481,,Is FHO also used here or is this like FA / IWH
4,/r/Braincels/comments/doplfy9/comment/dopnsx0,dopnsx0,924979030.686585,doplfy9,1508652013,,I think the main thing about r/IncelDiscussion...
5,/r/Braincels/comments/dopk1cs/comment/dopm17l,dopm17l,924979139.192799,dopk1cs,1508648408,,Great! I pmed the guy who wrote the treatise a...
6,/r/Braincels/comments/77ru0b/comment/doplfy9,doplfy9,925078641.54442,77ru0b,1508647327,,This has been tried.\n\n/r/IncelDiscussions\n\...
7,/r/Braincels/comments/dopj9dk/comment/dopk1cs,dopk1cs,924979030.686585,dopj9dk,1508644922,,That's a very good idea!\n\nThis sub's all abo...
8,/r/Braincels/comments/77ru0b/comment/dopj9dk,dopj9dk,924979139.192799,77ru0b,1508643701,,Would it be possible to repost very intellectu...
9,/r/Braincels/comments/dopbxoz/comment/dopcpxa,dopcpxa,924979030.686585,dopbxoz,1508635090,,Of course!


Number of posts r/incel:  988060
Number of posts r/incelexit:  111083
Number of posts r/braincels:  2216968
Number of total posts:  3316111
Unique users in each Reddit dataset:
reddit_incel_posts: 29877
reddit_incelexit: 4005
braincels_incel_posts: 69304


In [39]:
braincels_incel_posts.head()

Unnamed: 0,link,comment_id,user_id,parent,timestamp,title,text
0,https://www.reddit.com/r/Braincels/comments/77...,77ru0b,924979030.686585,,1508566505,Getting Settled in,General discussion and Q&amp;A thread since we...
1,/r/Braincels/comments/doqectl/comment/doqgmkg,doqgmkg,931484113.951227,doqectl,1508701504,,Thanks. It just seemed that it would be anothe...
2,/r/Braincels/comments/doqe6l5/comment/doqectl,doqectl,924979030.686585,doqe6l5,1508699066,,"To your last question, of course your safe her..."
3,/r/Braincels/comments/77ru0b/comment/doq6k7h,doq6k7h,931484113.951227,77ru0b,1508690481,,Is FHO also used here or is this like FA / IWH
4,/r/Braincels/comments/doplfy9/comment/dopnsx0,dopnsx0,924979030.686585,doplfy9,1508652013,,I think the main thing about r/IncelDiscussion...
