In [1]:
import numpy as np
import pandas as pd
import random
from neeka_lib import *
from reddit_helper import *
from sklearn.cluster import KMeans

In [2]:
# Parameters
MIN_VOTES_PER_USER = 8
MIN_VOTES_PER_POST = 8
N_GRAPH_ITERATIONS = 19
DIV_WEIGHT = 0
CENT_WEIGHT = 1
POL_WEIGHT = 1
VECTOR_LENGTH = 300 # length of user/post embeddings
SUBREDDIT_NAMES = ['politics', 'news', 'worldnews']

In [3]:
# get ALL votes
all_votes_df = pd.read_csv('reddit_votes_data/44_million_reddit_votes/44_million_votes.txt',sep='\t')

In [4]:
# get ALL submissions
all_submissions_df = pd.read_csv('reddit_votes_data/submission_info/submission_info.txt',sep='\t')
all_submissions_df.set_index('SUBMISSION_ID',inplace=True)

In [5]:
# get all submissions in specified subreddits submissions
submissions_df = all_submissions_df[all_submissions_df['SUBREDDIT'].isin(SUBREDDIT_NAMES)]

# get all submissions in specified subreddits submissions
r_subreddit_names = [f'r/{NAME}' for NAME in SUBREDDIT_NAMES]
votes_df = all_votes_df[all_votes_df['SUBREDDIT'].isin(r_subreddit_names)]
votes_df = all_votes_df[all_votes_df['SUBMISSION_ID'].isin(submissions_df.index)]

In [6]:
votes_df = filter_to_multiple_votes(votes_df, min_usr_votes=MIN_VOTES_PER_USER, min_post_votes=MIN_VOTES_PER_POST)

In [7]:
# get all politics users 
pol_users = votes_df['USERNAME'].unique()

# create our dataframe of users
vectors = pd.Series([ np.random.choice([1,-1],VECTOR_LENGTH) for _ in range(len(pol_users)) ])
intermediary_vectors = pd.Series([ np.zeros(VECTOR_LENGTH) for _ in range(len(pol_users)) ])

users_df = pd.DataFrame({'USERS':pol_users,'VECTOR':vectors, 'INTERMEDIARY':intermediary_vectors})
users_df.set_index('USERS',inplace=True)

In [8]:
reset_users(users_df)

In [9]:
# Iterate graph to form user embeddings
for _ in range(N_GRAPH_ITERATIONS):
    iterate_graph(votes_df, users_df)
    resolve_graph(users_df)

          35600 / 35608         

In [10]:
# Perform k-means clustering to group users - this is needed for testing
kmeans = KMeans(n_clusters=2, random_state=0, n_init='auto')
kmeans.fit(  np.stack(users_df['VECTOR'].to_numpy())  )
group_labels = kmeans.labels_
group_ids, group_counts = np.unique(group_labels, return_counts=True)
for grpid, grpcnt in zip(group_ids,group_counts):
    print(f'{grpid}: {grpcnt}')

0: 4632
1: 8024


In [11]:
# Put users into groups - this is needed for testing
users_df['GROUP'] = "NO_GROUP"
users_df.loc[group_labels == 0,'GROUP'] = 'GROUP_0'
users_df.loc[group_labels == 1,'GROUP'] = 'GROUP_1' 

In [12]:
# initialize the posts_df dataframe
posts_df = pd.DataFrame({'POST_ID':list(set(votes_df['SUBMISSION_ID']))})
posts_df.set_index('POST_ID',inplace=True)
reset_post_stats(posts_df)

In [13]:
# calculate some base statistics for each post
calculate_post_stats(users_df, votes_df, posts_df)

In [14]:
# calvulate the Neeka score for each post
neeka_score_calculation(posts_df, div_weight=DIV_WEIGHT, cent_weight=CENT_WEIGHT, pol_weight=POL_WEIGHT)

In [15]:
# calculate percentile rank 
x_index = np.logical_not(np.logical_or( posts_df['GROUP1_X'].isna(), posts_df['GROUP0_X'].isna())) 
posts_x_df = posts_df[x_index].copy()
calculate_percentile_rank(posts_x_df)

# Results

### Statistics

In [16]:
results = calculate_test_results(posts_x_df)
print_test_results(results)


    simple_agreement: 46.68 %
    neeka_agreement: 54.49 %
    agreement_change: 7.81 %

    simple_apolar: 34.12 %
    neeka_apolar: 49.05 %
    apolar_change: 14.93 %

    simple_neutrality: 66.46 %
    neeka_neutrality: 98.05 %
    neutrality_change: 31.58 %

    Overall:
    simple_quality: 49.09 %
    neeka_quality: 67.2 %
    quality_change: 18.11 %

    #######################################

    simple_group0_bias: 24.95 %
    simple_group1_bias: 58.48 %
    
    neeka_group0_bias: 44.06 %
    neeka_group1_bias: 42.11 %
    


### Most Promoted/Demoted ALL POSTS 

In [17]:
sample = posts_df
print_most_changed(sample, submissions_df)

Most Promoted by **Neeka Consensus** over simple-consensus (most demoted by simple-consensus):
1. "NBA owner Mark Cuban to Senate: 'Do your f---ing job'"
2. "Watch: Where Are All the Flag Lovers Now? Trump Plays and Points During the National Anthem"
3. "Trump approval ratings nosedive amid coronavirus outbreak"
4. "Mark Cuban wants the US government to give every household a $1,000 stimulus check every 2 weeks that expires if it's not spent within 10 days"
5. "It’s a MAGA Microbe Meltdown | Trump utterly fails to rise to his first real crisis."
6. "Warren Would Take Billionaires Down a Few Billion Pegs - Elizabeth Warren’s tax proposals would significantly curb the gigantic fortunes of America’s richest families over time."
7. "Tom Ridge: Selfish protests against stay-at-home orders dishonor America's veterans"
8. "Biden rolls out new policies on Medicare and student debt in effort to court Sanders supporters"
9. "Wetherspoons denies ‘abandoning’ staff in coronavirus crisis - The UK p

### Most Promoted/Demoted SAMPLE A

In [18]:
sample = posts_df.sample(1000)
print_most_changed(sample, submissions_df)

Most Promoted by **Neeka Consensus** over simple-consensus (most demoted by simple-consensus):
1. "Manufacturing Success: CNN’s Premature Parade for Buttigieg in Iowa"
2. "Another mayor in the Philippines shot dead inside his own office at the town hall on Wednesday, September 5 making him the 11th mayor shot dead under President Duterte's Administration"
3. "Nancy Pelosi: Trump’s Slow Response To Coronavirus Pandemic Was ‘Deadly’ - “As the president fiddles, people are dying,” the House speaker said."
4. "China shuts down more cities in bid to contain deadly virus"
5. "Centrist Pundits Assume Voters Agree with Them. Polling Tells a Different Story."
6. "Japanese Fisheries Collapsed Due to Pesticides, New Research Says"
7. "‘Inexcusable’: Dr. Sanjay Gupta Goes Off on Georgia Governor’s ‘Stunning’ Coronavirus Admission"
8. "New Poll Shows Every Democratic Frontrunner Beating Donald Trump in 2020 Election"
9. "Bald eagle rescued in Missouri after being shot in wing; shooter could face $1

### Most Promoted/Demoted SAMPLE B

In [19]:
sample = posts_df.sample(1000)
print_most_changed(sample, submissions_df)

Most Promoted by **Neeka Consensus** over simple-consensus (most demoted by simple-consensus):
1. "Nunes threatens to take CNN, Daily Beast to court over story about meeting with Ukrainian prosecutor | TheHill"
2. "Giuliani pal Lev Parnas wants to testify about Nunes’ scrapped Ukraine trip: lawyer"
3. "Public Enemy, Sarah Silverman to join Sanders at rally in Los Angeles"
4. "Sanders becomes first to qualify for Maine primary as largest progressive organization in Iowa endorses him"
5. "Multiple studies show Medicare for All would be cheaper than public option pushed by moderates"
6. "Biden, not Sanders, gains in popularity after Warren drops out: Reuters/Ipsos poll"
7. "Opinion: Are Americans ready for a female president? Yes. In fact, they might prefer one"
8. "Newly revealed emails show why Trump should fear a real Senate trial"
9. "Brazilians call for boycotts of major companies that support Bolsonaro | World news"
10. "Kamala Harris aide says in resignation letter: 'I've never see

### Most Promoted/Demoted SAMPLE C

In [20]:
sample = posts_df.sample(1000)
print_most_changed(sample, submissions_df)

Most Promoted by **Neeka Consensus** over simple-consensus (most demoted by simple-consensus):
1. "Pope cancels event due to 'illness' after touching hands and kissing heads at Vatican"
2. "Trump’s Budget: Making America God-Awful Again | It would destroy what’s left of the social safety net and finish the hollowing out of America’s working and middle classes."
3. "Virus-hit Chinese city to shut public transport"
4. "FBI: Nation-state actors have breached two US municipalities"
5. "Queen Elizabeth gives a coronavirus speech and the contrast with Trump is stunning"
6. "Joe Biden proposes expanding Medicare eligibility and student debt relief"
7. "George Conway Says Republicans Would Be 'Out For Blood' If Obama Had Done What Trump Did With Ukraine"
8. "In Year-End Address, Dying Healthcare Activist Ady Barkan Reminds Public Joe Biden Only 2020 Democrat Not to Meet With Him"
9. "Trump Is the Founders’ Worst Nightmare | The very conduct that necessitates presidential impeachment also suppl