
## antiwork

_March 21 Revision_

Criteria
1.  We want the largest possible data set that is manageable.
2.  We are interested in the posts, not necessarily the comments
3.  We are interested in the text, not necessarily the memes, etc.

As a way to reduce the number of posts from approximately 80k we could select the top 5000 posts on the basis of popularity scores (in the interest of focusing on those posts with high levels of engagement).  



In [157]:
import pandas as pd
import re

pd.set_option('display.max_colwidth', None)
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [158]:
#load full dataset thus far
antiwork_full = pd.read_csv("antiwork_full_text.csv")

#only posts with text in them (ie, not just links or images)
antiwork_full = antiwork_full[antiwork_full["selftext"].notnull()]

#sort by upvote ratio and score, take top 5000 ish
antiwork_full = antiwork_full.sort_values(by=["score","upvote_ratio"],ascending=False)[0:5000]


#reset index
antiwork_full.reset_index()


#does the post contain the word
#- strike
antiwork_full['has_strike'] = antiwork_full.selftext.str.contains("strike")
antiwork_full['has_strike'] = antiwork_full['has_strike'].replace({False:0,True:1})

#- union
antiwork_full['has_union'] = antiwork_full.selftext.str.contains("union")
antiwork_full['has_union'] = antiwork_full['has_union'].replace({False:0,True:1})

#- capitalism
antiwork_full['has_capitalism'] = antiwork_full.selftext.str.contains("capitalism")
antiwork_full['has_capitalism'] = antiwork_full['has_capitalism'].replace({False:0,True:1})

#- socialism
antiwork_full['has_socialism'] = antiwork_full.selftext.str.contains("socialism")
antiwork_full['has_socialism'] = antiwork_full['has_socialism'].replace({False:0,True:1})

#- anarchism
antiwork_full['has_anarchism'] = antiwork_full.selftext.str.contains("anarchism")
antiwork_full['has_anarchism'] = antiwork_full['has_anarchism'].replace({False:0,True:1})


#- resignation
antiwork_full['has_resignation'] = antiwork_full.selftext.str.contains("resignation")
antiwork_full['has_resignation'] = antiwork_full['has_resignation'].replace({False:0,True:1})

#- quit
antiwork_full['has_quit'] = antiwork_full.selftext.str.contains("quit")
antiwork_full['has_quit'] = antiwork_full['has_quit'].replace({False:0,True:1})


#- abolition of work/wage labour
antiwork_full['has_abolition'] = antiwork_full.selftext.str.contains("abolition")
antiwork_full['has_abolition'] = antiwork_full['has_abolition'].replace({False:0,True:1})


#- asshole boss / job
antiwork_full['has_asshole'] = antiwork_full.selftext.str.contains("asshole")
antiwork_full['has_asshole'] = antiwork_full['has_asshole'].replace({False:0,True:1})

#- labor
antiwork_full['has_labor'] = antiwork_full.selftext.str.contains("labor")
antiwork_full['has_labor'] = antiwork_full['has_labor'].replace({False:0,True:1})

#- scabs
antiwork_full['has_scabs'] = antiwork_full.selftext.str.contains("scab")
antiwork_full['has_scabs'] = antiwork_full['has_scabs'].replace({False:0,True:1})

#- contract
antiwork_full['has_contract'] = antiwork_full.selftext.str.contains("contract")
antiwork_full['has_contract'] = antiwork_full['has_contract'].replace({False:0,True:1})

#- temporary
antiwork_full['has_temporary'] = antiwork_full.selftext.str.contains("temporary")
antiwork_full['has_temporary'] = antiwork_full['has_temporary'].replace({False:0,True:1})

#- wage
antiwork_full['has_wage'] = antiwork_full.selftext.str.contains("wage")
antiwork_full['has_wage'] = antiwork_full['has_wage'].replace({False:0,True:1})


#-fired
antiwork_full['has_fired'] = antiwork_full.selftext.str.contains("fired")
antiwork_full['has_fired'] = antiwork_full['has_fired'].replace({False:0,True:1})




#Clear up weird HTML \n\n entries on those couple of entries


antiwork_full.loc[antiwork_full["id"]=="qvne64",["selftext"]] = antiwork_full[antiwork_full["id"]=="qvne64"]["selftext"].str.replace("\n"," ")
#antiwork_full[antiwork_full["id"]=="qvne64"]["selftext"]


antiwork_full.loc[antiwork_full["id"]=="r61fv8",["selftext"]] = antiwork_full[antiwork_full["id"]=="r61fv8"]["selftext"].str.replace("\n"," ")
#antiwork_full[antiwork_full["id"]=="r61fv8"]["selftext"]

antiwork_full.loc[antiwork_full["id"]=="rmegty",["selftext"]] = antiwork_full[antiwork_full["id"]=="rmegty"]["selftext"].str.replace("\n"," ")
#antiwork_full[antiwork_full["id"]=="rmegty"]["selftext"]


In [159]:
#Calculate and add VADER Scores
#Takes about 20 sec to run

sid = SentimentIntensityAnalyzer()

antiwork_full["vscore_pos"] = 0.0
antiwork_full["vscore_neg"] = 0.0
antiwork_full["vscore_neu"] = 0.0
antiwork_full["vscore_compound"] = 0.0


for index, row in antiwork_full.iterrows():
    ss = sid.polarity_scores(row["selftext"])
    antiwork_full.at[index,'vscore_pos'] = float(ss["pos"])
    antiwork_full.at[index,'vscore_neg'] = float(ss["neg"])
    antiwork_full.at[index,'vscore_neu'] = float(ss["neu"])
    antiwork_full.at[index,'vscore_compound'] = float(ss["compound"])

In [160]:
antiwork_full.to_csv("antiwork_march_21.csv",index=False)