In [2]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
print("Num GPUs Available: ", len(physical_devices))

Num GPUs Available:  1


# Import data

In [3]:
import pickle
import gdown

In [3]:
data_cleaned_url = "https://drive.google.com/uc?id=1-I5k-1NlFozfdbRb5JVy7nEfQEXqYP9c"
output = './Download/data_cleaned_gdown.zip'
gdown.download(data_cleaned_url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1-I5k-1NlFozfdbRb5JVy7nEfQEXqYP9c
To: /home/anthony/Documents/Research-Mapping-Uncanny-Valley/Code/Download/data_cleaned_gdown.zip
786MB [00:16, 49.1MB/s] 


'./Download/data_cleaned_gdown.zip'

module zipfile does not support the compression format (I don't know why). 
You can use
```bash
sudo apt-get update
sudo apt-get install unzip
unzip ./Download/data_cleaned_gdown.zip
```
to unzip the file to `/Code/Download` directory

In [4]:
with open('./Download/data_cleaned.pickle', 'rb') as handle:
    data_cleaned = pickle.load(handle)

# 1. Building a Set of Microframes

In [5]:
import nltk
from nltk.corpus import wordnet as wn
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/anthony/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### Show all antonyms in WordNet:

In [6]:
from nltk.corpus import wordnet as wn

def antonyms_for(word):
    antonyms = set()
    for ss in wn.synsets(word):
        for lemma in ss.lemmas():
            any_pos_antonyms = [ antonym.name() for antonym in lemma.antonyms() ]
            for antonym in any_pos_antonyms:
                antonym_synsets = wn.synsets(antonym)
                if wn.ADJ not in [ ss.pos() for ss in antonym_synsets ]:
                    continue
                antonyms.add(antonym)
    return antonyms

In [7]:
antonyms_for("terrifying")

set()

In [13]:
import torch
import torchtext
import numpy as np

In [14]:
glove = torchtext.vocab.GloVe(name="840B",dim=300)

### Example text

In [15]:
v_p_1 = glove['creepy']
v_p_2 = glove['non-creepy']
v_a_1 = v_p_2 - v_p_1

In [16]:
v_p_1 = glove['man']
v_p_2 = glove['woman']
v_a_2 = v_p_2 - v_p_1

In [17]:
v_a = [np.array(v_a_1.tolist()),np.array(v_a_2.tolist())]

# Framing Bias and Intensity

In [18]:
import pandas as pd
import numpy as np

In [167]:
topn = 100

iterables = [data_cleaned['RS_2020_nosleep']['id'].head(topn).to_list(), ['creepy - non-creepy', 'man - woman']]

index = pd.MultiIndex.from_product(iterables, names=['id', 'microframe'])
import swifter
df = pd.DataFrame(np.repeat(data_cleaned['RS_2020_nosleep'][['title', 'selftext', 'score']].head(topn).values, 2, axis = 0),index=index, columns=['title', 'selftext', 'score']).head(topn).copy()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,title,selftext,score
id,microframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gxsa0i,creepy - non-creepy,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1
gxsa0i,man - woman,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1
gxs6jf,creepy - non-creepy,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1
gxs6jf,man - woman,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1
gxrytp,creepy - non-creepy,My Best Friend Saw Bugs Under His Skin,It is hard for me to talk about my old friend ...,1
...,...,...,...,...
gxjuvy,man - woman,I own a boutique that offers full body transfo...,I realize it has been quite some time since my...,1
gxj3ed,creepy - non-creepy,I thought it was just a stomachache.,I never thought I'd experience something like ...,1
gxj3ed,man - woman,I thought it was just a stomachache.,I never thought I'd experience something like ...,1
gxiojd,creepy - non-creepy,HE. WON'T. LEAVE,"its been 6 days, I've been running for 6 DAYS....",1


In [168]:
def v_a_apply(row):
    if row.name[1] == "creepy - non-creepy":
        return np.array(v_a_1)
    else: 
        return np.array(v_a_2)

In [169]:
df['v_a'] = df.apply(v_a_apply, axis=1)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,title,selftext,score,v_a
id,microframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
gxsa0i,creepy - non-creepy,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."
gxsa0i,man - woman,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -..."
gxs6jf,creepy - non-creepy,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."
gxs6jf,man - woman,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -..."
gxrytp,creepy - non-creepy,My Best Friend Saw Bugs Under His Skin,It is hard for me to talk about my old friend ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."
...,...,...,...,...,...
gxjuvy,man - woman,I own a boutique that offers full body transfo...,I realize it has been quite some time since my...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -..."
gxj3ed,creepy - non-creepy,I thought it was just a stomachache.,I never thought I'd experience something like ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."
gxj3ed,man - woman,I thought it was just a stomachache.,I never thought I'd experience something like ...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -..."
gxiojd,creepy - non-creepy,HE. WON'T. LEAVE,"its been 6 days, I've been running for 6 DAYS....",1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."


In [171]:
import spacy
from sklearn.metrics.pairwise import cosine_similarity
nlp = spacy.load("en_core_web_lg")
    
def bias(row):
    # --- Calculate GloVe embeddings
    doc = nlp(row['selftext'])

    # doc_li is a list of tuples, where each tuple is (word, GloVe embedding)
    doc_li = []
    for token in doc:
        doc_li.append((token.text, glove[token.text]))
        
    # --- Calculate B
    denom_B = 0
    for word, emb in dict(doc_li).items():
        c = cosine_similarity(emb.reshape(1,-1), row['v_a'].reshape(1,-1)).item()
        f = [i[0] for i in doc_li].count(word)
        denom_B += f*c

    B = denom_B / len(doc_li)
    return B

def Bias_T(row):
    # --- Calculate GloVe embeddings
    doc = nlp(' '.join(df['selftext'].tolist()))

    # doc_li is a list of tuples, where each tuple is (word, GloVe embedding)
    doc_li = []
    for token in doc:
        doc_li.append((token.text, glove[token.text]))
        
    # --- Calculate B
    denom_B = 0
    for word, emb in dict(doc_li).items():
        c = cosine_similarity(emb.reshape(1,-1), row['v_a'].reshape(1,-1)).item()
        f = [i[0] for i in doc_li].count(word)
        denom_B += f*c

    B = denom_B / len(doc_li)
    return B


def intensity(row):
    doc = nlp(row['selftext'])

    # doc_li is a list of tuples, where each tuple is (word, GloVe embedding)
    doc_li = []
    for token in doc:
        doc_li.append((token.text, glove[token.text]))

    # --- Calculate I
    denom_I = 0
    for word, emb in dict(doc_li).items():
        f = [i[0] for i in doc_li].count(word)
        c = cosine_similarity(emb.reshape(1,-1), row['v_a'].reshape(1,-1)).item()
        denom_I += f*(c - row['Bias_T'])**2

    I = denom_I / len(doc_li)
    return I

In [30]:
from tqdm import tqdm
tqdm.pandas()

In [183]:
Bias_T = df.iloc[1:3,:].progress_apply(Bias_T, axis = 1)

100%|██████████| 2/2 [02:57<00:00, 88.95s/it]


In [201]:
df['Bias_T'] = np.tile(Bias_T.values, int(len(df)/2))
df

Unnamed: 0_level_0,Unnamed: 1_level_0,title,selftext,score,v_a,B,Bias_T,I
id,microframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
gxsa0i,creepy - non-creepy,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.191825,-0.018385,
gxsa0i,man - woman,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.019042,-0.180420,
gxs6jf,creepy - non-creepy,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.191825,-0.018385,
gxs6jf,man - woman,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.019042,-0.180420,
gxrytp,creepy - non-creepy,My Best Friend Saw Bugs Under His Skin,It is hard for me to talk about my old friend ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.196773,-0.018385,
...,...,...,...,...,...,...,...,...
gxjuvy,man - woman,I own a boutique that offers full body transfo...,I realize it has been quite some time since my...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.003641,-0.180420,
gxj3ed,creepy - non-creepy,I thought it was just a stomachache.,I never thought I'd experience something like ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.185393,-0.018385,
gxj3ed,man - woman,I thought it was just a stomachache.,I never thought I'd experience something like ...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.015623,-0.180420,
gxiojd,creepy - non-creepy,HE. WON'T. LEAVE,"its been 6 days, I've been running for 6 DAYS....",1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.187449,-0.018385,


In [202]:
df['B'] = df.progress_apply(bias, axis = 1)

100%|██████████| 100/100 [00:25<00:00,  3.92it/s]


In [203]:
df['I'] = df.progress_apply(intensity, axis = 1)

100%|██████████| 100/100 [00:26<00:00,  3.83it/s]


In [204]:
df['B'].max()

0.01351740594255536

In [205]:
df.groupby(level=1).mean(['B','I'])

Unnamed: 0_level_0,B,Bias_T,I
microframe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
creepy - non-creepy,-0.181523,-0.018385,0.031311
man - woman,-0.018442,-0.18042,0.030405


In [206]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,title,selftext,score,v_a,B,Bias_T,I
id,microframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
gxsa0i,creepy - non-creepy,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.191825,-0.018385,0.033087
gxsa0i,man - woman,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.019042,-0.180420,0.028752
gxs6jf,creepy - non-creepy,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.191825,-0.018385,0.033087
gxs6jf,man - woman,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.019042,-0.180420,0.028752
gxrytp,creepy - non-creepy,My Best Friend Saw Bugs Under His Skin,It is hard for me to talk about my old friend ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.196773,-0.018385,0.034942
...,...,...,...,...,...,...,...,...
gxjuvy,man - woman,I own a boutique that offers full body transfo...,I realize it has been quite some time since my...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.003641,-0.180420,0.037692
gxj3ed,creepy - non-creepy,I thought it was just a stomachache.,I never thought I'd experience something like ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.185393,-0.018385,0.032288
gxj3ed,man - woman,I thought it was just a stomachache.,I never thought I'd experience something like ...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.015623,-0.180420,0.031346
gxiojd,creepy - non-creepy,HE. WON'T. LEAVE,"its been 6 days, I've been running for 6 DAYS....",1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.187449,-0.018385,0.032858


In [84]:
# data sample
data = df['selftext'].iloc[1].split()
# prepare bootstrap sample
boot = np.random.choice(data, 10, replace=True)
print('Bootstrap Sample: %s' % boot)

Bootstrap Sample: ['incidents' 'dreaming' 'kind' 'quiet' 'animals' 'but' 'crossing' 'rule'
 'the' 'it']


In [159]:
from statistics import mean
def eta_B_apply(row):
    doc = nlp(row['selftext'])

    # doc_li is a list of tuples, where each tuple is (word, GloVe embedding)
    doc_li = []
    for token in doc:
        doc_li.append((token.text, glove[token.text]))
        
    # for 1000 bootstrap samples
    B_s = []
    for i in range(1000):
        doc_li_boot = np.array(doc_li, dtype="object")[np.random.choice(len(doc_li),10,replace=True)]

        # --- Calculate B
        denom_B = 0
        for word, emb in dict(doc_li_boot).items():
            c = cosine_similarity(emb.reshape(1,-1), row['v_a'].reshape(1,-1)).item()
            f = [i[0] for i in doc_li_boot].count(word)
            denom_B += f*c

        B = denom_B / len(doc_li_boot)
        B_s.append(B)
        
    return abs(row['B'] - mean(B_s))

def eta_I_apply(row):
    doc = nlp(row['selftext'])

    # doc_li is a list of tuples, where each tuple is (word, GloVe embedding)
    doc_li = []
    for token in doc:
        doc_li.append((token.text, glove[token.text]))
        
    # for 1000 bootstrap samples
    I_s = []
    for i in range(1000):
        doc_li_boot = np.array(doc_li, dtype="object")[np.random.choice(len(doc_li),10,replace=True)]
        
        # --- Calculate B
        denom_B = 0
        for word, emb in dict(doc_li_boot).items():
            c = cosine_similarity(emb.reshape(1,-1), row['v_a'].reshape(1,-1)).item()
            f = [i[0] for i in doc_li_boot].count(word)
            denom_B += f*c

        B = denom_B / len(doc_li_boot)

        # --- Calculate I
        denom_I = 0
        for word, emb in dict(doc_li_boot).items():
            f = [i[0] for i in doc_li_boot].count(word)
            c = cosine_similarity(emb.reshape(1,-1), row['v_a'].reshape(1,-1)).item()
            denom_I += f*(c - B)**2

        I = denom_I / len(doc_li_boot)
        I_s.append(I)
        
    return abs(row['I'] - mean(I_s))

In [153]:
df['eta_B'] = df.progress_apply(eta_B_apply, axis = 1)

100%|██████████| 500/500 [24:27<00:00,  2.93s/it]


In [162]:
df['eta_I'] = df.progress_apply(eta_I_apply, axis = 1)

 82%|████████▏ | 408/500 [31:24<07:04,  4.62s/it]


KeyboardInterrupt: 

In [161]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,title,selftext,score,v_a,B,I,eta_B
id,microframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
gxsa0i,creepy - non-creepy,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.191825,0.003005,0.000112
gxsa0i,man - woman,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.019042,0.002709,0.000135
gxs6jf,creepy - non-creepy,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.191825,0.003005,0.000334
gxs6jf,man - woman,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.019042,0.002709,0.000388
gxrytp,creepy - non-creepy,My Best Friend Saw Bugs Under His Skin,It is hard for me to talk about my old friend ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.196773,0.003120,0.000298
...,...,...,...,...,...,...,...,...
gw2mqd,man - woman,The old woman on my street. Video included.,[Video evidence.]\n\n know this subreddit is u...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.021059,0.004641,0.000716
gw2ds6,creepy - non-creepy,Rotten,I guess I should preface this by saying that I...,5,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.190415,0.003764,0.000665
gw2ds6,man - woman,Rotten,I guess I should preface this by saying that I...,5,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.012000,0.004192,0.000917
gw2b2i,creepy - non-creepy,Three paths appeared in the woods behind my ho...,[PART TWO]\n\nOn the third evening when I walk...,3,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.188346,0.003483,0.000469
