In [1]:
import numpy as np
import pandas as pd
import random
from neeka_lib import *
from reddit_helper import *
from sklearn.cluster import KMeans

In [2]:
# Parameters
MIN_VOTES_PER_USER = 4
MIN_VOTES_PER_POST = 4
N_GRAPH_ITERATIONS = 19
VECTOR_LENGTH = 300 # length of user/post embeddings
SUBREDDIT_NAMES = ['politics', 'news', 'worldnews']

In [3]:
# get ALL votes
all_votes_df = pd.read_csv('reddit_votes_data/44_million_reddit_votes/44_million_votes.txt',sep='\t')

In [4]:
# get ALL submissions
all_submissions_df = pd.read_csv('reddit_votes_data/submission_info/submission_info.txt',sep='\t')
all_submissions_df.set_index('SUBMISSION_ID',inplace=True)

In [5]:
# get all submissions in specified subreddits submissions
submissions_df = all_submissions_df[all_submissions_df['SUBREDDIT'].isin(SUBREDDIT_NAMES)]

# get all submissions in specified subreddits submissions
r_subreddit_names = [f'r/{NAME}' for NAME in SUBREDDIT_NAMES]
votes_df = all_votes_df[all_votes_df['SUBREDDIT'].isin(r_subreddit_names)]
votes_df = all_votes_df[all_votes_df['SUBMISSION_ID'].isin(submissions_df.index)]

In [6]:
votes_df = filter_to_multiple_votes(votes_df, min_usr_votes=MIN_VOTES_PER_USER, min_post_votes=MIN_VOTES_PER_POST)

In [7]:
# get all politics users 
pol_users = votes_df['USERNAME'].unique()

# create our dataframe of users
vectors = pd.Series([ np.random.choice([1,-1],VECTOR_LENGTH) for _ in range(len(pol_users)) ])
intermediary_vectors = pd.Series([ np.zeros(VECTOR_LENGTH) for _ in range(len(pol_users)) ])

users_df = pd.DataFrame({'USERS':pol_users,'VECTOR':vectors, 'INTERMEDIARY':intermediary_vectors})
users_df.set_index('USERS',inplace=True)

In [8]:
reset_users(users_df)

In [9]:
# Iterate graph to form user embeddings
for _ in range(N_GRAPH_ITERATIONS):
    iterate_graph(votes_df, users_df)
    resolve_graph(users_df)

          71000 / 71048         

In [10]:
# Perform k-means clustering to group users - this is needed for testing
kmeans = KMeans(n_clusters=2, random_state=0, n_init='auto')
kmeans.fit(  np.stack(users_df['VECTOR'].to_numpy())  )
group_labels = kmeans.labels_
group_ids, group_counts = np.unique(group_labels, return_counts=True)
for grpid, grpcnt in zip(group_ids,group_counts):
    print(f'{grpid}: {grpcnt}')

0: 6737
1: 10499


In [11]:
# Put users into groups - this is needed for testing
users_df['GROUP'] = "NO_GROUP"
users_df.loc[group_labels == 0,'GROUP'] = 'GROUP_0'
users_df.loc[group_labels == 1,'GROUP'] = 'GROUP_1' 

In [12]:
# initialize the posts_df dataframe
posts_df = pd.DataFrame({'POST_ID':list(set(votes_df['SUBMISSION_ID']))})
posts_df.set_index('POST_ID',inplace=True)
reset_post_stats(posts_df)

In [13]:
# calculate some base statistics for each post
calculate_post_stats(users_df, votes_df, posts_df)

In [14]:
# calvulate the Neeka score for each post
neeka_score_calculation(posts_df)

In [15]:
# calculate percentile rank 
x_index = np.logical_not(np.logical_or( posts_df['GROUP1_X'].isna(), posts_df['GROUP0_X'].isna())) 
posts_x_df = posts_df[x_index].copy()
calculate_percentile_rank(posts_x_df)

# Results

### Statistics

In [16]:
results = calculate_test_results(posts_x_df)
print_test_results(results)


    simple_agreement: 50.45 %
    neeka_agreement: 65.77 %
    agreement_change: 15.33 %

    simple_apolar: 33.88 %
    neeka_apolar: 49.21 %
    apolar_change: 15.33 %

    simple_neutrality: 70.66 %
    neeka_neutrality: 91.46 %
    neutrality_change: 20.8 %

    Overall:
    simple_quality: 51.66 %
    neeka_quality: 68.81 %
    quality_change: 17.15 %

    #######################################

    simple_group0_bias: 30.32 %
    simple_group1_bias: 59.66 %
    
    neeka_group0_bias: 46.31 %
    neeka_group1_bias: 54.85 %
    


### Most Promoted/Demoted ALL POSTS 

In [17]:
sample = posts_df
print_most_changed(sample, submissions_df)

Most Promoted by **Neeka Consensus** over simple-consensus (most demoted by simple-consensus):
1. "India to buy US$1 billion of US crude for the first time ever"
2. "Michigan Dam Fails; Flooding in Kentucky and Ohio Also Forces Evacuations"
3. "US school 'sorry' for foster care threat over lunch debt"
4. "DEA announces intent to schedule kratom"
5. "Four rockets hit Iraqi military base near Baghdad airport"
6. "Hungary Withdraws From European Singing Competition Because 'It's Too Gay'"
7. "Students called a suicide hotline listed on their ID cards. It was sex hotline instead."
8. "Obama formally joins US into climate pact"
9. "Nearly 175 Saudi military aviation students grounded in U.S. after base shooting"
10. "Oliver Stone: Don't praise President Bush just because you hate Donald Trump"
-
Most Promoted by **simple-consensus** over Neeka (most demoted by Neeka):
1. "Experts say Trump firing of 3 officials including Sondland and Vindman is a ‘criminal’ offense"
2. "Europeans fear relat

### Most Promoted/Demoted SAMPLE A

In [18]:
sample = posts_df.sample(1000)
print_most_changed(sample, submissions_df)

Most Promoted by **Neeka Consensus** over simple-consensus (most demoted by simple-consensus):
1. "40 years ago we stopped the practice of separating American Indian families. Let’s not reverse course."
2. "Rod Rosenstein resigned and thanked Trump for his ‘courtesy’. Is this the saddest ever case of Stockholm syndrome?"
3. "5 rockets land near american embassy"
4. "Black hole found 1,000 light years from Earth: Object found in HR 6819 system is the closest to Earth yet known – and is unusually dark."
5. "Prime Minister Boris Johnson released from intensive care"
6. "Connecticut Police Accidentally Record Themselves Conspiring to Fabricate Criminal Charges Against Protester"
7. "Beto O'Rourke raises more funds in 3rd quarter than Jeb Bush raised for entire 2016 campaign"
8. "Rohingya leaders condemn 'liar' Aung San Suu Kyi after she denies Myanmar genocide"
9. "Steyer aide offered money for endorsements"
10. "Trump, Bill Barr, and the arrival of the worst-case scenario"
-
Most Promoted

### Most Promoted/Demoted SAMPLE B

In [19]:
sample = posts_df.sample(1000)
print_most_changed(sample, submissions_df)

Most Promoted by **Neeka Consensus** over simple-consensus (most demoted by simple-consensus):
1. "US-linked Australian church fined US$98,000 for selling bleach as coronavirus ‘miracle cure’: ‘There is no clinical, scientifically-accepted evidence showing that Miracle Mineral Solution can cure or alleviate any disease,’ says Australia’s drug regulator"
2. "Black snow falling in Siberia due to air loaded with coal dust prompted the regional governor on Tuesday to temporarily close a coal processing plant he accused of killing residents"
3. "Doctors and nurses are getting hit with pay cuts, layoffs, and furloughs even as they fight the coronavirus pandemic"
4. "Donald Trump tweets "Impeach the Pres," Twitter responds accordingly: "I bet Melania typed that""
5. "'He hates white men': Prosecutors accuse Iowa man, convicted in separate murder, of racially-charged killing"
6. "U.S. envoy Sondland did not link Biden probe to aid: Ukraine minister"
7. "Obama tells anxious Democrats to 'chill 

### Most Promoted/Demoted SAMPLE C

In [20]:
sample = posts_df.sample(1000)
print_most_changed(sample, submissions_df)

Most Promoted by **Neeka Consensus** over simple-consensus (most demoted by simple-consensus):
1. "India: J&amp;K cop posts a video of bustling Kashmir streets ahead of Eid"
2. "The NSA has obtained direct access to the systems of Google, Facebook, Apple and other US internet giants allowing them to freely obtain the content of emails, video and voice chat, videos, photos, Skype chats, file transfers, social networking details, and more."
3. "Google tracked his bike ride past a burglarized home. That made him a suspect"
4. "40 years ago we stopped the practice of separating American Indian families. Let’s not reverse course."
5. "Trump Trust Revised So He Can Take Profits From His Businesses At Any Time"
6. "‘Just go ahead. Let’s do this.’ Democratic presidential contender Pete Buttigieg takes on protester at Chicago fundraiser."
7. "A California college student has accused popular video-sharing app TikTok in a class-action lawsuit of transferring private user data to servers in China,