In [1]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
print("Num GPUs Available: ", len(physical_devices))

Num GPUs Available:  1


# Import data

In [2]:
import pickle
import gdown

In [3]:
data_cleaned_url = "https://drive.google.com/uc?id=1-I5k-1NlFozfdbRb5JVy7nEfQEXqYP9c"
output = './Download/data_cleaned_gdown.zip'
gdown.download(data_cleaned_url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1-I5k-1NlFozfdbRb5JVy7nEfQEXqYP9c
To: /home/anthony/Documents/Research-Mapping-Uncanny-Valley/Code/Download/data_cleaned_gdown.zip
786MB [00:16, 49.1MB/s] 


'./Download/data_cleaned_gdown.zip'

module zipfile does not support the compression format (I don't know why). 
You can use
```bash
sudo apt-get update
sudo apt-get install unzip
unzip ./Download/data_cleaned_gdown.zip
```
to unzip the file to `/Code/Download` directory

In [3]:
with open('./Download/data_cleaned.pickle', 'rb') as handle:
    data_cleaned = pickle.load(handle)

# 1. Building a Set of Microframes

In [4]:
import nltk
from nltk.corpus import wordnet as wn
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/anthony/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### Show all antonyms in WordNet:

In [5]:
from nltk.corpus import wordnet as wn

def antonyms_for(word):
    antonyms = set()
    for ss in wn.synsets(word):
        for lemma in ss.lemmas():
            any_pos_antonyms = [ antonym.name() for antonym in lemma.antonyms() ]
            for antonym in any_pos_antonyms:
                antonym_synsets = wn.synsets(antonym)
                if wn.ADJ not in [ ss.pos() for ss in antonym_synsets ]:
                    continue
                antonyms.add(antonym)
    return antonyms

In [6]:
antonyms_for("terrifying")

set()

In [7]:
import torch
import torchtext

In [8]:
glove = torchtext.vocab.GloVe(name="840B",dim=300)

### Example text

In [106]:
v_p_1 = glove['creepy']
v_p_2 = glove['non-creepy']
v_a_1 = v_p_2 - v_p_1

In [107]:
v_p_1 = glove['man']
v_p_2 = glove['woman']
v_a_2 = v_p_2 - v_p_1

In [78]:
v_a = [np.array(v_a_1.tolist()),np.array(v_a_2.tolist())]

# Framing Bias and Intensity

In [191]:
import pandas as pd
import numpy as np

In [247]:
topn = 1000

iterables = [data_cleaned['RS_2020_self']['id'].head(topn).to_list(), ['creepy - non-creepy', 'man - woman']]

index = pd.MultiIndex.from_product(iterables, names=['id', 'microframe'])
import swifter
df = pd.DataFrame(np.repeat(data_cleaned['RS_2020_self'][['title', 'selftext', 'score']].head(topn).values, 2, axis = 0),index=index, columns=['title', 'selftext', 'score']).head(topn).copy()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,title,selftext,score
id,microframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
gxsfpz,creepy - non-creepy,My older sister is moving out of country soon,We hung out a lot and I was closest to her out...,1
gxsfpz,man - woman,My older sister is moving out of country soon,We hung out a lot and I was closest to her out...,1
gxs6xf,creepy - non-creepy,Started Drawabox after like 5 months of puttin...,"(still not sure what this sub is used for, so ...",1
gxs6xf,man - woman,Started Drawabox after like 5 months of puttin...,"(still not sure what this sub is used for, so ...",1
gxs658,creepy - non-creepy,It's my birthday today,As the title says. It's my 19th birthday and I...,1
...,...,...,...,...
gvcpoy,man - woman,What are some really good ways to cope with an...,I ask this mostly towards people who have seve...,1
gvchgd,creepy - non-creepy,Any ideas for what to write on a poster for pr...,I live in the US and am going protesting in a ...,1
gvchgd,man - woman,Any ideas for what to write on a poster for pr...,I live in the US and am going protesting in a ...,1
gvc9rm,creepy - non-creepy,I got over myself and started sharing my art o...,"I picked up painting over two years ago, and d...",2


In [248]:
def v_a_apply(row):
    if row.name[1] == "creepy - non-creepy":
        return np.array(v_a_1)
    else: 
        return np.array(v_a_2)

In [249]:
df['v_a'] = df.apply(v_a_apply, axis=1)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,title,selftext,score,v_a
id,microframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
gxsfpz,creepy - non-creepy,My older sister is moving out of country soon,We hung out a lot and I was closest to her out...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."
gxsfpz,man - woman,My older sister is moving out of country soon,We hung out a lot and I was closest to her out...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -..."
gxs6xf,creepy - non-creepy,Started Drawabox after like 5 months of puttin...,"(still not sure what this sub is used for, so ...",1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."
gxs6xf,man - woman,Started Drawabox after like 5 months of puttin...,"(still not sure what this sub is used for, so ...",1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -..."
gxs658,creepy - non-creepy,It's my birthday today,As the title says. It's my 19th birthday and I...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."
...,...,...,...,...,...
gvcpoy,man - woman,What are some really good ways to cope with an...,I ask this mostly towards people who have seve...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -..."
gvchgd,creepy - non-creepy,Any ideas for what to write on a poster for pr...,I live in the US and am going protesting in a ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."
gvchgd,man - woman,Any ideas for what to write on a poster for pr...,I live in the US and am going protesting in a ...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -..."
gvc9rm,creepy - non-creepy,I got over myself and started sharing my art o...,"I picked up painting over two years ago, and d...",2,"[-0.31623998, -0.18520999, 0.06655002, 0.31497..."


In [250]:
import spacy
from sklearn.metrics.pairwise import cosine_similarity
nlp = spacy.load("en_core_web_lg")
    
def bias(row):
    # --- Calculate GloVe embeddings
    doc = nlp(row['selftext'])

    # doc_li is a list of tuples, where each tuple is (word, GloVe embedding)
    doc_li = []
    for token in doc:
        doc_li.append((token.text, glove[token.text]))
        
    # --- Calculate B
    denom_B = 0
    for word, emb in dict(doc_li).items():
        c = cosine_similarity(emb.reshape(1,-1), row['v_a'].reshape(1,-1)).item()
        f = [i[0] for i in doc_li].count(word)
        denom_B += f*c

    B = denom_B / len(doc_li)
    return B


def intensity(row):
    doc = nlp(row['selftext'])

    # doc_li is a list of tuples, where each tuple is (word, GloVe embedding)
    doc_li = []
    for token in doc:
        doc_li.append((token.text, glove[token.text]))

    # --- Calculate I
    denom_I = 0
    for word, emb in dict(doc_li).items():
        f = [i[0] for i in doc_li].count(word)
        c = cosine_similarity(emb.reshape(1,-1), row['v_a'].reshape(1,-1)).item()
        denom_I += f*(c - row['B'])**2

    I = denom_I / len(doc_li)
    return I

In [251]:
from tqdm import tqdm
tqdm.pandas()

In [252]:
df['B'] = df.progress_apply(bias, axis = 1)

100%|██████████| 1000/1000 [00:47<00:00, 20.84it/s]


In [253]:
df['I'] = df.progress_apply(intensity, axis = 1)

100%|██████████| 1000/1000 [00:47<00:00, 20.93it/s]


In [261]:
df['B'].min()

-0.2677257756392161

In [258]:
df.groupby(level=1).mean(['B','I'])

Unnamed: 0_level_0,B,I
microframe,Unnamed: 1_level_1,Unnamed: 2_level_1
creepy - non-creepy,-0.18474,0.004282
man - woman,-0.017204,0.003647


In [259]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,title,selftext,score,v_a,B,I
id,microframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
gxsfpz,creepy - non-creepy,My older sister is moving out of country soon,We hung out a lot and I was closest to her out...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.209010,0.002355
gxsfpz,man - woman,My older sister is moving out of country soon,We hung out a lot and I was closest to her out...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",0.005989,0.008664
gxs6xf,creepy - non-creepy,Started Drawabox after like 5 months of puttin...,"(still not sure what this sub is used for, so ...",1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.170364,0.005717
gxs6xf,man - woman,Started Drawabox after like 5 months of puttin...,"(still not sure what this sub is used for, so ...",1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.024467,0.001907
gxs658,creepy - non-creepy,It's my birthday today,As the title says. It's my 19th birthday and I...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.195810,0.003534
...,...,...,...,...,...,...,...
gvcpoy,man - woman,What are some really good ways to cope with an...,I ask this mostly towards people who have seve...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",0.007790,0.007326
gvchgd,creepy - non-creepy,Any ideas for what to write on a poster for pr...,I live in the US and am going protesting in a ...,1,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.196011,0.001836
gvchgd,man - woman,Any ideas for what to write on a poster for pr...,I live in the US and am going protesting in a ...,1,"[0.19866699, 0.07221998, -0.186463, 0.58374, -...",-0.021439,0.002024
gvc9rm,creepy - non-creepy,I got over myself and started sharing my art o...,"I picked up painting over two years ago, and d...",2,"[-0.31623998, -0.18520999, 0.06655002, 0.31497...",-0.187010,0.004767
