## Evaluation with Standard Benchmarks: Coherence
### Using evaluation tool for word embeddings

Here, we apply standard benchmarks on coherence on w2v and debiased w2v.

In [1]:
# Subset of GoogleNews-vectors:
# https://drive.google.com/file/d/1NH6jcrg8SXbnhpIXRIXF_-KUE7wGxGaG/view?usp=sharing

# For full embeddings:
# Download embeddings at https://github.com/tolga-b/debiaswe and put them on the following directory
# embeddings/GoogleNews-vectors-negative300-hard-debiased.bin
# embeddings/GoogleNews-vectors-negative300.bin

In [2]:
from __future__ import print_function, division
%matplotlib inline
from matplotlib import pyplot as plt
import json
import random
import numpy as np

import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.data import load_professions

import benchmarks as benchmarks
from benchmarks.wordsim.wordsim import Wordsim

# Small w2vNEWS set

## 1: original word embeddings on RG & WS

Sources:

#### RG: H. Rubenstein and J. B. Goodenough. Contextual correlates of synonymy. Communications of the ACM, 8(10):627–633, 1965.

####  WS: L. Finkelstein, E. Gabrilovich, Y. Matias, E. Rivlin, Z. Solan, G. Wolfman, and E. Ruppin. Placing search in context: The concept  revisited. In WWW. ACM, 2001.


In [3]:
# Load google news word2vec
E = WordEmbedding('./embeddings/w2v_gnews_small.txt')

*** Reading data from ./embeddings/w2v_gnews_small.txt
(26423, 300)
26423 words of dimension 300 : in, for, that, is, ..., Jay, Leroy, Brad, Jermaine


### Wordsim benchmarks
Code adapted from source 

#### embedding-evaluation: https://github.com/k-kawakami/embedding-evaluation

In [4]:
wordsim = Wordsim("en")
word2vec = wordsim.load_vector('./embeddings/w2v_gnews_small.txt')
result_original = wordsim.evaluate(word2vec)
wordsim.pprint(result_original)

load_vector
loading vector...
b'in'
b'for'
b'that'
b'is'
b'on'
b'with'
b'said'
b'was'
b'the'
b'at'
b'not'
b'as'
b'it'
b'be'
b'from'
b'by'
b'are'
b'have'
b'he'
b'will'
b'has'
b'his'
b'an'
b'this'
b'or'
b'their'
b'who'
b'they'
b'but'
b'had'
b'year'
b'were'
b'we'
b'more'
b'up'
b'been'
b'you'
b'its'
b'one'
b'about'
b'would'
b'which'
b'out'
b'can'
b'all'
b'also'
b'two'
b'after'
b'first'
b'do'
b'time'
b'than'
b'when'
b'over'
b'last'
b'new'
b'other'
b'her'
b'people'
b'into'
b'our'
b'there'
b'she'
b'could'
b'just'
b'years'
b'some'
b'three'
b'million'
b'them'
b'what'
b'so'
b'no'
b'like'
b'if'
b'only'
b'percent'
b'get'
b'did'
b'him'
b'game'
b'back'
b'because'
b'now'
b'before'
b'company'
b'any'
b'team'
b'against'
b'off'
b'most'
b'made'
b'through'
b'make'
b'second'
b'state'
b'well'
b'day'
b'season'
b'says'
b'week'
b'where'
b'while'
b'down'
b'being'
b'government'
b'your'
b'home'
b'going'
b'my'
b'good'
b'should'
b'many'
b'way'
b'those'
b'four'
b'during'
b'such'
b'may'
b'very'
b'how'
b'since'
b'work'

b'television'
b'rules'
b'rights'
b'review'
b'quite'
b'regular'
b'served'
b'kept'
b'marketing'
b'created'
b'heart'
b'wrote'
b'prison'
b'instead'
b'concerns'
b'offering'
b'returned'
b'benefit'
b'designed'
b'approved'
b'foot'
b'search'
b'miles'
b'planned'
b'driving'
b'entire'
b'providing'
b'host'
b'ask'
b'design'
b'hearing'
b'save'
b'looked'
b'travel'
b'successful'
b'beginning'
b'movie'
b'adding'
b'forced'
b'toward'
b'ensure'
b'passed'
b'unit'
b'happen'
b'mean'
b'shooting'
b'gone'
b'winner'
b'hopes'
b'understand'
b'planning'
b'trip'
b'raise'
b'owner'
b'popular'
b'completed'
b'ways'
b'attention'
b'feet'
b'assets'
b'traffic'
b'environment'
b'interview'
b'consumers'
b'hands'
b'contact'
b'cars'
b'dead'
b'features'
b'challenge'
b'particularly'
b'basis'
b'violence'
b'stand'
b'consumer'
b'sense'
b'attorney'
b'driver'
b'highest'
b'posted'
b'finish'
b'limited'
b'source'
b'serve'
b'spot'
b'advantage'
b'scoring'
b'words'
b'applications'
b'reduce'
b'white'
b'appeared'
b'itself'
b'fuel'
b'simply'
b'fi

b'volume'
b'effects'
b'tests'
b'interests'
b'occurred'
b'bond'
b'introduced'
b'sport'
b'spokeswoman'
b'behavior'
b'defeat'
b'treated'
b'direction'
b'bigger'
b'crew'
b'allegedly'
b'uses'
b'mortgage'
b'miss'
b'catch'
b'appearance'
b'welcome'
b'experienced'
b'ideas'
b'negotiations'
b'authority'
b'slightly'
b'urged'
b'hole'
b'reform'
b'achieve'
b'assistance'
b'delivered'
b'compete'
b'link'
b'choose'
b'possibility'
b'ruled'
b'interesting'
b'seats'
b'threw'
b'drove'
b'immediate'
b'investments'
b'payments'
b'northern'
b'relations'
b'license'
b'discovered'
b'prove'
b'registered'
b'perform'
b'governments'
b'debut'
b'researchers'
b'accounts'
b'limit'
b'eyes'
b'deaths'
b'neighborhood'
b'handed'
b'veteran'
b'alternative'
b'basic'
b'manage'
b'tools'
b'negative'
b'secretary'
b'kill'
b'receiving'
b'collection'
b'coaches'
b'returns'
b'surgery'
b'gun'
b'replace'
b'fan'
b'institutions'
b'personnel'
b'golf'
b'happens'
b'surprise'
b'changing'
b'kick'
b'ceremony'
b'west'
b'resolution'
b'broken'
b'plenty'
b

b'poverty'
b'properly'
b'somebody'
b'canceled'
b'sounds'
b'sick'
b'saves'
b'lucky'
b'folks'
b'defend'
b'lights'
b'minority'
b'posting'
b'drilling'
b'differences'
b'regularly'
b'roster'
b'bankruptcy'
b'participation'
b'interim'
b'controls'
b'competitors'
b'stuck'
b'delivering'
b'viewers'
b'agreements'
b'subscribers'
b'smoke'
b'compliance'
b'manner'
b'trucks'
b'yes'
b'mistakes'
b'formal'
b'investor'
b'speculation'
b'qualifying'
b'attorneys'
b'finds'
b'quiet'
b'cleared'
b'bag'
b'explosion'
b'taxpayers'
b'guide'
b'occur'
b'encouraging'
b'highway'
b'sleep'
b'recover'
b'interviews'
b'satellite'
b'entirely'
b'dry'
b'papers'
b'soldier'
b'covers'
b'exhibition'
b'blast'
b'capture'
b'function'
b'availability'
b'lending'
b'peak'
b'wild'
b'facts'
b'procedures'
b'lap'
b'awareness'
b'pursue'
b'edition'
b'studio'
b'correct'
b'inch'
b'switch'
b'participating'
b'tend'
b'champions'
b'lay'
b'purposes'
b'departments'
b'ranking'
b'addressed'
b'perspective'
b'components'
b'seized'
b'clothing'
b'drawing'
b're

b'affecting'
b'rugby'
b'forest'
b'cloud'
b'contractor'
b'chest'
b'signature'
b'uniform'
b'accidents'
b'intervention'
b'tougher'
b'teenager'
b'gambling'
b'mortgages'
b'standings'
b'evaluation'
b'planet'
b'dressed'
b'underground'
b'exact'
b'conservation'
b'midfielder'
b'depend'
b'replacing'
b'headline'
b'questioning'
b'athlete'
b'wide_range'
b'comparable'
b'accomplished'
b'ordinance'
b'feared'
b'discussing'
b'riders'
b'copper'
b'unexpected'
b'administrators'
b'favorable'
b'painting'
b'comply'
b'headquartered'
b'gross'
b'picks'
b'diplomatic'
b'lighting'
b'tobacco'
b'feelings'
b'pays'
b'documentary'
b'addresses'
b'senators'
b'linebacker'
b'cleaning'
b'withdrawal'
b'brilliant'
b'custom'
b'priorities'
b'economist'
b'waited'
b'furniture'
b'sub'
b'turnover'
b'costly'
b'height'
b'volumes'
b'till'
b'grab'
b'completing'
b'sample'
b'equipped'
b'delegates'
b'favorites'
b'shortage'
b'publisher'
b'estate'
b'hitter'
b'globe'
b'evaluate'
b'sits'
b'venture'
b'shoppers'
b'designated'
b'tying'
b'signals'


b'freed'
b'gonna'
b'layup'
b'bridges'
b'endorsement'
b'examination'
b'educated'
b'tensions'
b'explosive'
b'exceeded'
b'imported'
b'fantasy'
b'suits'
b'recognizes'
b'm'
b'minimal'
b'medal'
b'reads'
b'locals'
b'trailing'
b'prayers'
b'v'
b'suggestion'
b'workplace'
b'simultaneously'
b'universal'
b'lifting'
b'financially'
b'recruitment'
b'quest'
b'grave'
b'lobbying'
b'argues'
b'bold'
b'brutal'
b'reply'
b'renowned'
b'reconciliation'
b'sleeping'
b'computing'
b'sooner'
b'switched'
b'laptop'
b'surveyed'
b'reservations'
b'woes'
b'swim'
b'marking'
b'breathing'
b'blind'
b'immune'
b'founding'
b'opted'
b'unrest'
b'curriculum'
b'shifts'
b'troubles'
b'midway_through'
b'distributor'
b'damaging'
b'coup'
b'representation'
b'notify'
b'manufacture'
b'random'
b'institutional'
b'transported'
b'protein'
b'mounted'
b'bottles'
b'presenting'
b'pleasure'
b'surveys'
b'accomplish'
b'declaration'
b'dumped'
b'weigh'
b'evolution'
b'accusing'
b'upgraded'
b'teeth'
b'lately'
b'loses'
b'browser'
b'politically'
b'researche

b'semiconductor'
b'consistency'
b'cake'
b'worrying'
b'amounted'
b'helmet'
b'marry'
b'obtaining'
b'uranium'
b'dental'
b'homered'
b'tracked'
b'wheels'
b'altogether'
b'exercises'
b'favors'
b'quicker'
b'smashed'
b'fixing'
b'dairy'
b'afterward'
b'scorer'
b'photography'
b'instructor'
b'yelling'
b'complement'
b'affidavit'
b'disturbing'
b'murders'
b'compact'
b'examining'
b'threatens'
b'reflection'
b'touching'
b'shelters'
b'proves'
b'pizza'
b'prisoner'
b'diamond'
b'instrumental'
b'copyright'
b'lakh'
b'uncomfortable'
b'anonymous'
b'crushed'
b'brave'
b'obstacles'
b'juvenile'
b'contingent'
b'merit'
b'voiced'
b'clicking'
b'economic_downturn'
b'piano'
b'stripped'
b'adapt'
b'enhancing'
b'romantic'
b'interact'
b'blacks'
b'webcast'
b'speeches'
b'billing'
b'ear'
b'composed'
b'convictions'
b'cops'
b'd'
b'interpretation'
b'sang'
b'listened'
b'inmate'
b'credentials'
b'domain'
b'imaging'
b'disc'
b'poses'
b'loved_ones'
b'healing'
b'steroids'
b'appoint'
b'permanently'
b'agreeing'
b'wide_receiver'
b'outscored'

b'gamers'
b'protesting'
b'tumor'
b'exile'
b'preservation'
b'poorest'
b'charitable'
b'improper'
b'rains'
b'riot'
b'intentionally'
b'screened'
b'configuration'
b'prohibit'
b'quantity'
b'experiments'
b'trap'
b'unavailable'
b'lied'
b'woke'
b'punish'
b'balances'
b'utilized'
b'criticize'
b'furious'
b'harbor'
b'hinted'
b'playground'
b'qualification'
b'confined'
b'wonders'
b'mask'
b'gig'
b'sponsoring'
b'oath'
b'renewable'
b'rap'
b'phases'
b'digging'
b'ambition'
b'losers'
b'shrinking'
b'shouting'
b'reversal'
b'billed'
b'consequence'
b'pinned'
b'pulls'
b'preserving'
b'proposes'
b'price_tag'
b'installment'
b'blanket'
b'pointer'
b'designation'
b'exhausted'
b'posing'
b'mentality'
b'federation'
b'accordingly'
b'rigorous'
b'identities'
b'hint'
b'selections'
b'assessing'
b'upward'
b'baseline'
b'pond'
b'patented'
b'expands'
b'verification'
b'academics'
b'campuses'
b'foremost'
b'handsets'
b'plight'
b'heal'
b'thieves'
b'queen'
b'solidarity'
b'aided'
b'overlooked'
b'tolerated'
b'coat'
b'fertilizer'
b'gran

b'discovering'
b'microphone'
b'delaying'
b'withdrawing'
b'morale'
b'sidewalks'
b'balloon'
b'warranty'
b'bucks'
b'seriously_injured'
b'dilemma'
b'servicing'
b'troopers'
b'eaten'
b'cubic_feet'
b'ruin'
b'inducted'
b'govern'
b'prevailing'
b'offs'
b'analog'
b'commanding'
b'mutual_funds'
b'escorted'
b'worthwhile'
b'chooses'
b'concession'
b'sounding'
b'insiders'
b'exceptionally'
b'explored'
b'genuinely'
b'dreamed'
b'paced'
b'quarterfinal'
b'sail'
b'supervised'
b'situated'
b'skies'
b'maritime'
b'wellness'
b'exercised'
b'probable'
b'powder'
b'penetration'
b'frankly'
b'comprise'
b'unwilling'
b'steering'
b'def'
b'rallying'
b'sanctioned'
b'auditorium'
b'earmarked'
b'extract'
b'applauded'
b'stride'
b'trusts'
b'skating'
b'tackled'
b'relocate'
b'bypass'
b'deflected'
b'smiled'
b'impending'
b'bath'
b'auditor'
b'heating_oil'
b'aka'
b'commenced'
b'statistical'
b'packaged'
b'gradual'
b'prescriptions'
b'mansion'
b'accidental'
b'pointers'
b'collegiate'
b'enhancement'
b'inclined'
b'clashed'
b'restitution'
b'

b'disciplines'
b'tender_offer'
b'norms'
b'weary'
b'sneak'
b'skid'
b'thunderstorms'
b'composite'
b'distances'
b'informing'
b'concluding'
b'circulated'
b'systematic'
b'distinguish'
b'cyclists'
b'simpler'
b'theories'
b'wildfire'
b'media_outlets'
b'staple'
b'thereafter'
b'anchored'
b'motivate'
b'stunt'
b'racked'
b'rash'
b'greed'
b'professionalism'
b'onset'
b'roaming'
b'passive'
b'perpetrators'
b'rang'
b'awhile'
b'homestand'
b'deportation'
b'clout'
b'relegated'
b'bored'
b'relevance'
b'apartment_complex'
b'picnic'
b'routed'
b'snapping'
b'hackers'
b'alien'
b'owing'
b'advocating'
b'figuring'
b'rebellion'
b'inflated'
b'indices'
b'chanting'
b'capacities'
b'surpass'
b'revoked'
b'bred'
b'anywhere_else'
b'fiery'
b'transcript'
b'religions'
b'refunds'
b'grader'
b'unknown_risks'
b'worsened'
b'whip'
b'coincide'
b'stressful'
b'underwear'
b'delete_comments'
b'rooted'
b'modes'
b'blue_chip'
b'vows'
b'obstruction'
b'hood'
b'faction'
b'belts'
b'raw_material'
b'adamant'
b'apples'
b'astonishing'
b'spoiled'
b's

b'hunter'
b'capping'
b'defunct'
b'historians'
b'ambulances'
b'tattoo'
b'scream'
b'assert'
b'firsthand'
b'divers'
b'scenic'
b'inputs'
b'somewhere_else'
b'raft'
b'carbon_emissions'
b'billboards'
b'shells'
b'outburst'
b'sacrificed'
b'reconcile'
b'unlucky'
b'unrealistic'
b'extortion'
b'biodiesel'
b'unfavorable'
b'wildly'
b'raging'
b'positives'
b'nursing_homes'
b'repeat_violators'
b'responders'
b'intensify'
b'lbs'
b'profanity'
b'longevity'
b'citations'
b'forestry'
b'downward'
b'upload'
b'noble'
b'seamlessly'
b'endeavors'
b'malware'
b'bush'
b'reflective'
b'ubiquitous'
b'defects'
b'specials'
b'ecological'
b'kidnappers'
b'accolades'
b'intentional'
b'portraits'
b'lopsided'
b'bulls'
b'angles'
b'atrocities'
b'crossbar'
b'gossip'
b'telecast'
b'outer'
b'invests'
b'bagged'
b'curator'
b'somebody_else'
b'showcases'
b'enjoyment'
b'avail'
b'contemplating'
b'flaw'
b'shadows'
b'user_click'
b'vaccination'
b'exits'
b'exaggerated'
b'swearing'
b'tunnels'
b'trucking'
b'weed'
b'dinners'
b'guidelines_click'
b'pr

b'pounder'
b'reassuring'
b'disrupting'
b'guardian'
b'unsustainable'
b'cottage'
b'wedge'
b'waterways'
b'dipping'
b'knocks'
b'resuming'
b'warehouses'
b'haunt'
b'diarrhea'
b'doorstep'
b'untouched'
b'hiatus'
b'sharks'
b'mobilized'
b'informant'
b'wards'
b'arrogance'
b'tax_evasion'
b'scanner'
b'space_shuttle'
b'pathway'
b'fusion'
b'lifeline'
b'screw'
b'quiz'
b'entrenched'
b'feasibility'
b'greeting'
b'conceding'
b'surcharge'
b'lunches'
b'climbs'
b'unfolded'
b'exiting'
b'annoyed'
b'unpleasant'
b'progresses'
b'curling'
b'likened'
b'textiles'
b'roadways'
b'limelight'
b'stubborn'
b'mankind'
b'endanger'
b'faults'
b'darker'
b'roared'
b'stagnant'
b'freeing'
b'littered'
b'plugged'
b'disclosing'
b'most_populous'
b'comparative'
b'essays'
b'finalizing'
b'gadget'
b'tame'
b'bookstore'
b'mediator'
b'turbulence'
b'articulate'
b'alma_mater'
b'sucked'
b'sitcom'
b'decorations'
b'sketch'
b'replies'
b'hotel_rooms'
b'simulated'
b'taps'
b'timeframe'
b'entertainer'
b'fur'
b'singer_songwriter'
b'decidedly'
b'unilate

b'bites'
b'reprinted'
b'tones'
b'tabs'
b'stumps'
b'vinyl'
b'attire'
b'file_sharing'
b'peacekeeping'
b'faiths'
b'bells'
b'wrath'
b'hunted'
b'chopped'
b'fruition'
b'red_tape'
b'microwave'
b'con'
b'optimum'
b'assuring'
b'retro'
b'portals'
b'nurture'
b'ironically'
b'pops'
b'geothermal'
b'concedes'
b'unsolicited'
b'exchanging'
b'draining'
b'withdrawals'
b'braking'
b'blizzard'
b'hyped'
b'cafes'
b'snatch'
b'voted_unanimously'
b'assertions'
b'extradited'
b'violin'
b'vampire'
b'cinematic'
b'docked'
b'delinquent'
b'internship'
b'spectator'
b'dependency'
b'unaudited'
b'entourage'
b'schizophrenia'
b'squash'
b'disrespect'
b'indulge'
b'intervals'
b'typing'
b'inferior'
b'orphanage'
b'scrapping'
b'text_messaging'
b'overlap'
b'victimized'
b'relinquish'
b'adolescent'
b'humane'
b'tallest'
b'bordering'
b'defensemen'
b'genetics'
b'understandably'
b'hurled'
b'heralded'
b'wiring'
b'interacting'
b'testosterone'
b'firepower'
b'hallmark'
b'insurance_premiums'
b'blatant'
b'extinguished'
b'lowers'
b'carmaker'
b's

b'hovered'
b'fried'
b'guest_speaker'
b'exec'
b'burner'
b'ensued'
b'gram'
b'sequential'
b'violators'
b'tack'
b'overcrowded'
b'charisma'
b'addictive'
b'offend'
b'plow'
b'summons'
b'intrigue'
b'occupational'
b'massively'
b'cyclical'
b'kidneys'
b'slightest'
b'civil_unions'
b'exposes'
b'facade'
b'innovate'
b'haunting'
b'stretcher'
b'linemen'
b'hotter'
b'footballers'
b'autistic'
b'perpetual'
b'recurrence'
b'waitress'
b'peanuts'
b'rosters'
b'scanners'
b'pretext'
b'indifferent'
b'conveniently'
b'donned'
b'crept'
b'nanny'
b'venerable'
b'appoints'
b'cured'
b'humiliated'
b'wherein'
b'coordinates'
b'simulate'
b'food_stamps'
b'walkout'
b'lawns'
b'forceful'
b'refiners'
b'squeezing'
b'towels'
b'bedding'
b'foot_birdie_putt'
b'iceberg'
b'fingerprint'
b'chairmen'
b'peek'
b'teller'
b'defamatory'
b'fruitful'
b'cord'
b'fairways'
b'deadlocked'
b'scuffle'
b'dentists'
b'mirrored'
b'discriminate'
b'organizes'
b'rowing'
b'customizable'
b'policyholders'
b'tenths'
b'longest_serving'
b'timeless'
b'swapping'
b'brew

b'peg'
b'formulas'
b'plotted'
b'crutches'
b'caste'
b'swamp'
b'fossil_fuel'
b'leaflets'
b'repealed'
b'ecosystems'
b'redirected'
b'commemoration'
b'expletive'
b'resorting'
b'brainchild'
b'replays'
b'ornaments'
b'competency'
b'coordinators'
b'bogeyed'
b'deterred'
b't_shirt'
b'public_servants'
b'cameraman'
b'penultimate'
b'retribution'
b'dice'
b'disparities'
b'fashioned'
b'convenience_stores'
b'supper'
b'voices_engaged'
b'stat'
b'glossy'
b'liberated'
b'presidential_palace'
b'bankruptcies'
b'berries'
b'resurgent'
b'resounding'
b'fumes'
b'bark'
b'jar'
b'ing'
b'assailant'
b'excesses'
b'statistically'
b'ill_fated'
b'cricketer'
b'beads'
b'fridge'
b'quilt'
b'retrial'
b'temples'
b'cannon'
b'militancy'
b'rivalries'
b'legislatures'
b'appellant'
b'recapture'
b'fiddle'
b'advancements'
b'disdain'
b'animal_cruelty'
b'contaminants'
b'drawer'
b'perfume'
b'fries'
b'colossal'
b'gross_margins'
b'demoted'
b'transforms'
b'embankment'
b'biking'
b'blur'
b'drown'
b'deficiency'
b'haircut'
b'unethical'
b'equates'


b'reply_actions'
b'anesthesia'
b'immature'
b'forex'
b'unify'
b'marshal'
b'simmering'
b'mistrust'
b'crawled'
b'cusp'
b'yr'
b'disks'
b'gangster'
b'unleashing'
b'hampering'
b'stain'
b'ballooned'
b'hone'
b'originate'
b'flight_attendant'
b'liberties'
b'chew'
b'dress_code'
b'deem'
b'socialize'
b'pummeled'
b'bows'
b'equestrian'
b'flu_vaccine'
b'midsize'
b'frees'
b'mustard'
b'flourishing'
b'changeup'
b'inaccessible'
b'fetched'
b'nurtured'
b'trump'
b'enlightened'
b'expedited'
b'movers'
b'temperament'
b'undeveloped'
b'milling'
b'pregnancies'
b'pies'
b'wayward'
b'waiter'
b'skater'
b'coconut'
b'implants'
b'garnering'
b'shrapnel'
b'communists'
b'contrasting'
b'inn'
b'dented'
b'overtook'
b'stupidity'
b'yachts'
b'intersected'
b'autographed'
b'deducted'
b'conquered'
b'hides'
b'incursion'
b'tides'
b'maverick'
b'discredited'
b'clueless'
b'enactment'
b'antibiotic'
b'untapped'
b'procure'
b'drones'
b'differed'
b'compromising'
b'foursome'
b'inclination'
b'interleague'
b'checkered_flag'
b'crystal_clear'
b'pe

b'heroism'
b'redshirt'
b'numb'
b'feather'
b'capitol'
b'regionally'
b'self_titled'
b'tentative_agreement'
b'spoof'
b'harshly'
b'aspiration'
b'polling_places'
b'camper'
b'childhood_obesity'
b'hobbies'
b'suffice'
b'barking'
b'fret'
b'sporty'
b'prerequisite'
b'spoiler'
b'inappropriately'
b'legions'
b'repressive'
b'tax_deductible'
b'backfire'
b'wont'
b'manning'
b'deli'
b'wield'
b'single_handedly'
b'slower_pace'
b'invaders'
b'anecdotes'
b'liberal_arts'
b'bottomed'
b'resell'
b'tricked'
b'philosophies'
b'confrontational'
b'inexplicably'
b'forfeiture'
b'giddy'
b'gag'
b'astonished'
b'sprinkled'
b'ants'
b'glaciers'
b'timid'
b'telco'
b'molestation'
b'pantry'
b'overpowered'
b'divest'
b'malaise'
b'subordinate'
b'persistently'
b'infidelity'
b'mercenaries'
b'drumming'
b'wrongful_death'
b'pose_threat'
b'deduct'
b'negativity'
b'narrated'
b'spokesmen'
b'angler'
b'jockeys'
b'wager'
b'blackouts'
b'glanced'
b'implicit'
b'cleanliness'
b'orchestrating'
b'pursuits'
b'breastfeeding'
b'graphical'
b'fucking'
b'su

b'vinegar'
b'stipend'
b'revocation'
b'saluted'
b'recorders'
b'stink'
b'chore'
b'miscarriage'
b'dryer'
b'proprietor'
b'pirated'
b'adjusts'
b'hepatitis'
b'equine'
b'personalization'
b'environmentalist'
b'detachment'
b'moviegoers'
b'bathing'
b'justifying'
b'defrauded'
b'downward_spiral'
b'commenters'
b'tennis_courts'
b'remedial'
b'rave_reviews'
b'semblance'
b'rites'
b'nutshell'
b'elbows'
b'scorching'
b'cabbage'
b'sharp_contrast'
b'perils'
b'earrings'
b'obscured'
b'inflicting'
b'examiner'
b'bean'
b'emperor'
b'hydrocarbon'
b'donning'
b'unmet'
b'scoured'
b'handwriting'
b'progressives'
b'abiding'
b'coalitions'
b'cruisers'
b'awkwardly'
b'premarket_trading'
b'adhering'
b'cronies'
b'abstained'
b'beachfront'
b'collaborator'
b'formulating'
b'perk'
b'paradox'
b'midpoint'
b'readying'
b'fuck'
b'sanitary'
b'firmed'
b'charms'
b'expiry'
b'paraphernalia'
b'feuding'
b'microbes'
b'mile_radius'
b'irate'
b'shred'
b'sprinters'
b'paperback'
b'formulations'
b'alienate'
b'subtly'
b'shortening'
b'nonviolent'
b'ex

b'procured'
b'biker'
b'pigeons'
b'softness'
b'hut'
b'fosters'
b'discord'
b'restarting'
b'closets'
b'precedence'
b'spaghetti'
b'rejoice'
b'huh'
b'drier'
b'schoolers'
b'zoos'
b'erroneously'
b'stiffer'
b'guilty_verdict'
b'glitzy'
b'chewed'
b'disbursed'
b'commenter'
b'dole'
b'archrival'
b'chart_profile'
b'gratified'
b'triangular'
b'buffs'
b'shaking_hands'
b'skateboarding'
b'stent'
b'receptors'
b'coffins'
b'physiological'
b'swarming'
b'tribunals'
b'locomotive'
b'distortions'
b'complicit'
b'benchmarking'
b'forgettable'
b'acupuncture'
b'dialogues'
b'unlocking'
b'volcanic'
b'panicking'
b'periphery'
b'inconceivable'
b'renegade'
b'faking'
b'graphic_design'
b'birth_defects'
b'uncharacteristic'
b'sprinklers'
b'shied_away'
b'wobbly'
b'sender'
b'drywall'
b'dazed'
b'heighten'
b'breakfasts'
b'add_ons'
b'unbalanced'
b'racetracks'
b'trey'
b'reds'
b'shrinks'
b'lashing'
b'ss'
b'blockbusters'
b'suppressing'
b'sampling_error'
b'skeletons'
b'slot_machine'
b'dined'
b'fatigued'
b'wholeheartedly'
b'metal_detect

b'ruck'
b'gosh'
b'brightly'
b'concierge'
b'steaming'
b'trafficked'
b'unlisted'
b'pit_stops'
b'dart'
b'refrigerated'
b'grads'
b'innocuous'
b'shampoo'
b'relies_heavily'
b'undertakings'
b'righteous'
b'grumbling'
b'polishing'
b'relic'
b'unaffordable'
b'apprehend'
b'mixer'
b'oblivion'
b'decentralized'
b'destitute'
b'goings'
b'patriots'
b'slander'
b'pastries'
b'skate_park'
b'bilateral_ties'
b'deductibles'
b'fastballs'
b'onside_kick'
b'archival'
b'commuted'
b'religiously'
b'perpetually'
b'wildly_popular'
b'cricketing'
b'wine_tasting'
b'tubing'
b'broad_array'
b'cab_driver'
b'orchestras'
b'reprehensible'
b'intrinsic'
b'screenshots'
b'worked_tirelessly'
b'triple_digit'
b'eras'
b'unattractive'
b'testy'
b'impassioned'
b'locker_rooms'
b'hemisphere'
b'bagging'
b'redefining'
b'auctioning'
b'reinvigorate'
b'cosmopolitan'
b'dueling'
b'geologists'
b'intensifies'
b'www'
b'unplanned'
b'misunderstandings'
b'rediscovered'
b'radiation_therapy'
b'criminality'
b'unbroken'
b'absentia'
b'impair'
b'constitutional

b'pinpointed'
b'bio_email'
b'helpline'
b'whips'
b'hiccup'
b'scuttle'
b'moody'
b'shooting_spree'
b'substantiated'
b'motherboard'
b'sew'
b'relished'
b'vocation'
b'hairy'
b'impeding'
b'enemy_combatants'
b'rudimentary'
b'll'
b'radioed'
b'bliss'
b'exaggerate'
b'statins'
b'distinguishes'
b'wheeling'
b'lavender'
b'jihadists'
b'zipped'
b'bloodbath'
b'expeditions'
b'humankind'
b'strategic_alliances'
b'pods'
b'sculpted'
b'multitasking'
b'electrocuted'
b'ratchet'
b'illiteracy'
b'austere'
b'preachers'
b'directional'
b'co_ordinate'
b'potentials'
b'hooded'
b'bargaining_table'
b'rejoining'
b'arid'
b'vilified'
b'smacks'
b'kilowatt_hours'
b'eighty'
b'cinematography'
b'axle'
b'hilltop'
b'semifinalists'
b'interconnection'
b'shutters'
b'bowel'
b'kayaking'
b'estranged_husband'
b'recessionary'
b'reappear'
b'mummy'
b'nontraditional'
b'polymers'
b'deferral'
b'goalscorer'
b'vector'
b'summarize'
b'tuneup'
b'flailing'
b'avoidable'
b'whine'
b'purposeful'
b'coaxed'
b'puberty'
b'scrawled'
b'reappointed'
b'unsavory'

b'cougar'
b'steelhead'
b'confiscation'
b'deviate'
b'net_neutrality'
b'straddles'
b'avocado'
b'delisted'
b'bacterium'
b'subcontinent'
b'uninformed'
b'loudspeaker'
b'defibrillator'
b'pardons'
b'supply_disruptions'
b'recollections'
b'jewelers'
b'demoralized'
b'carbon_capture'
b'nerve_cells'
b'spurts'
b'aptitude'
b'brokering'
b'spear'
b'midget'
b'bereaved'
b'leadoff_homer'
b'uber'
b'spoilers'
b'contours'
b'reciting'
b'ferried'
b'decreed'
b'zip_code'
b'legitimize'
b'summarizes'
b'colds'
b'massed'
b'penitentiary'
b'flanks'
b'impediments'
b'wink'
b'elastic'
b'absenteeism'
b'conclave'
b'mascots'
b'showering'
b'environs'
b'firming'
b'dress_rehearsal'
b'downsized'
b'swine_flu_vaccine'
b'remain_vigilant'
b'precincts_reporting'
b'reaffirming'
b'worldview'
b'replicating'
b'bunches'
b'aunts'
b'brooding'
b'splendor'
b'skits'
b'sought_refuge'
b'dud'
b'steelmakers'
b'slugging'
b'blank_check'
b'brokerage_firms'
b'amenity'
b'electrodes'
b'awaken'
b'prejudiced'
b'freaks'
b'bobbled'
b'remorseful'
b'enforce

b'flat_tire'
b'enchanting'
b'choppers'
b'cobalt'
b'humiliate'
b'infuriating'
b'octopus'
b'realtor'
b'smelly'
b'unending'
b'biographer'
b'mental_toughness'
b'purged'
b'machinations'
b'purporting'
b'ensuing_possession'
b'trash_bin'
b'coastal_waters'
b'languish'
b'cylinder_engine'
b'von'
b'bonnet'
b'misdemeanor_counts'
b'existential'
b'vagina'
b'dermatologist'
b'dilemmas'
b'sports_memorabilia'
b'mains'
b'glazed'
b'hormonal'
b'checkup'
b'transparently'
b'nationalize'
b'prejudicial'
b'saturated_fat'
b'disintegrated'
b'zooming'
b'jacked'
b'brainstorming'
b'entitles'
b'negatively_affected'
b'finalization'
b'ascertained'
b'camcorders'
b'pasted'
b'gaping'
b'considerate'
b'confessional'
b'teen_pregnancy'
b'anecdotal_evidence'
b'quizzes'
b'domino_effect'
b'shudder'
b'headsets'
b'soloists'
b'reinvestment'
b'cryptic'
b'airman'
b'selector'
b'dermatology'
b'flirt'
b'downwards'
b'corrects'
b'adversely'
b'biometrics'
b'bullshit'
b'constrain'
b'uncanny'
b'seep'
b'sweetest'
b'straddling'
b'mono'
b'heartl

b'slugged'
b'profess'
b'encircled'
b'decommissioned'
b'doves'
b'registries'
b'viewpoints_race'
b'fiddling'
b'silliness'
b'mistaken_identity'
b'commonsense'
b'heavily_populated'
b'cachet'
b'costliest'
b'toying'
b'embedding'
b'ghostly'
b'artisan'
b'distilled'
b'intestine'
b'accrual'
b'harsh_criticism'
b'unaffiliated'
b'dyes'
b'meted'
b'sediments'
b'navigator'
b'incurable'
b'eloquently'
b'ethereal'
b'blockades'
b'watercolors'
b'reelected'
b'motherland'
b'creditable'
b'risk_averse'
b'mechanically'
b'energizing'
b'fifties'
b'unassailable'
b'abandons'
b'virginity'
b'axing'
b'rollercoaster'
b'pricier'
b'asterisk'
b'corals'
b'contemplates'
b'senatorial'
b'parred'
b'geo'
b'intensively'
b'fazed'
b'wearable'
b'wronged'
b'anomalous'
b'opulent'
b'armistice'
b'facebook'
b'policymaking'
b'assimilate'
b'unremarkable'
b'interstates'
b'coastlines'
b'link_tags'
b'kick_returner'
b'inherits'
b'abhorrent'
b'polyester'
b'passerby'
b'aptly_named'
b'tellers'
b'consecutively'
b'airway'
b'counterpoint'
b'nomadic

b'parsley'
b'panchayat'
b'insecticide'
b'taco'
b'chuck'
b'burgundy'
b'wobble'
b'reinvention'
b'bouncers'
b'racist_vulgar'
b'deteriorates'
b'closers'
b'delves'
b'mar'
b'implausible'
b'earthly'
b'thawing'
b'crescendo'
b'co_ordinating'
b'bane'
b'fiesta'
b'whipped_cream'
b'sawmill'
b'muffins'
b'tractor_trailers'
b'multi_disciplinary'
b'modems'
b'feverish'
b'frontrunners'
b'riser'
b'scuba_diving'
b'adoration'
b'mainstays'
b'lodged_complaint'
b'fighter_pilot'
b'junctions'
b'stabilizes'
b'puffing'
b'recycled_materials'
b'moan'
b'water_heaters'
b'liturgy'
b'undrafted'
b'functionaries'
b'winningest_coach'
b'taut'
b'knelt'
b'org'
b'spray_paint'
b'heckled'
b'retelling'
b'flinch'
b'mouthed'
b'glancing'
b'gain_traction'
b'celery'
b'steelmaker'
b'cacophony'
b'thereby_reducing'
b'concentration_camps'
b'obscene_offensive'
b'dungeon'
b'tackler'
b'leveraged_buyout'
b'poop'
b'hookup'
b'inquisitive'
b'spokespeople'
b'safeguarded'
b'stab_wound'
b'beheadings'
b'bodily_injury'
b'mod'
b'dispersal'
b'marriage_

## 2: Debiased word embeddings on RG & WS


### Step 2a: Hard debiased

In [5]:
from debiaswe.debias import debias

In [6]:
# Lets load some gender related word lists to help us with debiasing
with open('./data/definitional_pairs.json', "r") as f:
    defs = json.load(f)
print("definitional", defs)

with open('./data/equalize_pairs.json', "r") as f:
    equalize_pairs = json.load(f)

with open('./data/gender_specific_seed.json', "r") as f:
    gender_specific_words = json.load(f)
# print("gender specific", len(gender_specific_words), gender_specific_words[:10])

definitional [['woman', 'man'], ['girl', 'boy'], ['she', 'he'], ['mother', 'father'], ['daughter', 'son'], ['gal', 'guy'], ['female', 'male'], ['her', 'his'], ['herself', 'himself'], ['Mary', 'John']]


In [7]:
debias(E, gender_specific_words, defs, equalize_pairs)
print("Saving to file...")
E.save('./embeddings/w2v_gnews_debiased_small.txt')
print("\n\nDone!\n")

26423 words of dimension 300 : in, for, that, is, ..., Jay, Leroy, Brad, Jermaine
{('gentlemen', 'ladies'), ('males', 'females'), ('FATHERHOOD', 'MOTHERHOOD'), ('FRATERNITY', 'SORORITY'), ('Father', 'Mother'), ('CONGRESSMAN', 'CONGRESSWOMAN'), ('dudes', 'gals'), ('KINGS', 'QUEENS'), ('dad', 'mom'), ('catholic_priest', 'nun'), ('boy', 'girl'), ('twin_brother', 'twin_sister'), ('CATHOLIC_PRIEST', 'NUN'), ('colt', 'filly'), ('COUNCILMAN', 'COUNCILWOMAN'), ('boys', 'girls'), ('King', 'Queen'), ('His', 'Her'), ('FATHERS', 'MOTHERS'), ('dads', 'moms'), ('prince', 'princess'), ('GELDING', 'MARE'), ('Spokesman', 'Spokeswoman'), ('Male', 'Female'), ('Wives', 'Husbands'), ('COLT', 'FILLY'), ('Brothers', 'Sisters'), ('Councilman', 'Councilwoman'), ('Fathers', 'Mothers'), ('SCHOOLBOY', 'SCHOOLGIRL'), ('his', 'her'), ('grandson', 'granddaughter'), ('HE', 'SHE'), ('KING', 'QUEEN'), ('Businessman', 'Businesswoman'), ('Fella', 'Granny'), ('Testosterone', 'Estrogen'), ('HIS', 'HER'), ('Chairman', 'Chai

In [8]:
# Do benchmark for hard-debiased
wordsim = Wordsim("en")
word2vec = wordsim.load_vector('./embeddings/w2v_gnews_debiased_small.txt')
result_debiased = wordsim.evaluate(word2vec)
print("ORIGINAL")
wordsim.pprint(result_original)
print("DEBIASED")
wordsim.pprint(result_debiased)

load_vector
loading vector...
b'in'
b'for'
b'that'
b'is'
b'on'
b'with'
b'said'
b'was'
b'the'
b'at'
b'not'
b'as'
b'it'
b'be'
b'from'
b'by'
b'are'
b'have'
b'he'
b'will'
b'has'
b'his'
b'an'
b'this'
b'or'
b'their'
b'who'
b'they'
b'but'
b'had'
b'year'
b'were'
b'we'
b'more'
b'up'
b'been'
b'you'
b'its'
b'one'
b'about'
b'would'
b'which'
b'out'
b'can'
b'all'
b'also'
b'two'
b'after'
b'first'
b'do'
b'time'
b'than'
b'when'
b'over'
b'last'
b'new'
b'other'
b'her'
b'people'
b'into'
b'our'
b'there'
b'she'
b'could'
b'just'
b'years'
b'some'
b'three'
b'million'
b'them'
b'what'
b'so'
b'no'
b'like'
b'if'
b'only'
b'percent'
b'get'
b'did'
b'him'
b'game'
b'back'
b'because'
b'now'
b'before'
b'company'
b'any'
b'team'
b'against'
b'off'
b'most'
b'made'
b'through'
b'make'
b'second'
b'state'
b'well'
b'day'
b'season'
b'says'
b'week'
b'where'
b'while'
b'down'
b'being'
b'government'
b'your'
b'home'
b'going'
b'my'
b'good'
b'should'
b'many'
b'way'
b'those'
b'four'
b'during'
b'such'
b'may'
b'very'
b'how'
b'since'
b'work'

b'miles'
b'planned'
b'driving'
b'entire'
b'providing'
b'host'
b'ask'
b'design'
b'hearing'
b'save'
b'looked'
b'travel'
b'successful'
b'beginning'
b'movie'
b'adding'
b'forced'
b'toward'
b'ensure'
b'passed'
b'unit'
b'happen'
b'mean'
b'shooting'
b'gone'
b'winner'
b'hopes'
b'understand'
b'planning'
b'trip'
b'raise'
b'owner'
b'popular'
b'completed'
b'ways'
b'attention'
b'feet'
b'assets'
b'traffic'
b'environment'
b'interview'
b'consumers'
b'hands'
b'contact'
b'cars'
b'dead'
b'features'
b'challenge'
b'particularly'
b'basis'
b'violence'
b'stand'
b'consumer'
b'sense'
b'attorney'
b'driver'
b'highest'
b'posted'
b'finish'
b'limited'
b'source'
b'serve'
b'spot'
b'advantage'
b'scoring'
b'words'
b'applications'
b'reduce'
b'white'
b'appeared'
b'itself'
b'fuel'
b'simply'
b'fiscal'
b'plays'
b'account'
b'follow'
b'statements'
b'organizations'
b'pick'
b'continues'
b'session'
b'nuclear'
b'seconds'
b'daughter'
b'co'
b'sports'
b'caused'
b'injury'
b'effect'
b'selling'
b'middle'
b'cuts'
b'drop'
b'facility'
b'tru

b'dedicated'
b'tight'
b'lawyers'
b'sets'
b'linked'
b'providers'
b'investigators'
b'stands'
b'terrorism'
b'arm'
b'freedom'
b'notice'
b'sentenced'
b'coalition'
b'touch'
b'exercise'
b'settlement'
b'disaster'
b'assist'
b'electronic'
b'playoffs'
b'combination'
b'apply'
b'shared'
b'doctor'
b'comprehensive'
b'earn'
b'fill'
b'rock'
b'giant'
b'please'
b'suit'
b'owns'
b'electricity'
b'exciting'
b'winners'
b'beating'
b'protest'
b'rejected'
b'ticket'
b'farmers'
b'ride'
b'maker'
b'heading'
b'regulations'
b'models'
b'ends'
b'innovative'
b'upcoming'
b'writer'
b'performed'
b'films'
b'arms'
b'married'
b'prosecutors'
b'agents'
b'handle'
b'duty'
b'captain'
b'securities'
b'employment'
b'surprised'
b'worried'
b'ninth'
b'pointed'
b'magazine'
b'threatened'
b'headquarters'
b'participants'
b'visiting'
b'press_release'
b'fly'
b'primarily'
b'respectively'
b'transportation'
b'younger'
b'jury'
b'respond'
b'rural'
b'acting'
b'factor'
b'update'
b'island'
b'flat'
b'remove'
b'photo'
b'mary'
b'selection'
b'scheme'
b'im

b'typical'
b'membership'
b'hiring'
b'consideration'
b'civilian'
b'fairly'
b'climate_change'
b'investigate'
b'violations'
b'pipeline'
b'seeks'
b'crore'
b'hire'
b'theme'
b'stressed'
b'resort'
b'walks'
b'roles'
b'rebound'
b'reference'
b'specialist'
b'timing'
b'imagine'
b'rapidly'
b'angry'
b'riding'
b'violation'
b'attitude'
b'guest'
b'select'
b'tech'
b'barrels'
b'sensitive'
b'hosting'
b'demanded'
b'marijuana'
b'intent'
b'strengthen'
b'channels'
b'ministers'
b'newspapers'
b'referred'
b'testified'
b'column'
b'arrive'
b'rushed'
b'kilometers'
b'frame'
b'provision'
b'authorized'
b'fail'
b'completion'
b'traveled'
b'heavily'
b'acquire'
b'striking'
b'virus'
b'secured'
b'breaks'
b'attractive'
b'indeed'
b'approve'
b'survived'
b'intense'
b'somewhat'
b'aims'
b'crews'
b'protected'
b'checks'
b'distributed'
b'talented'
b'kicked'
b'permission'
b'judgment'
b'beer'
b'clothes'
b'coal'
b'packed'
b'promoting'
b'flood'
b'approached'
b'towns'
b'disappointing'
b'collaboration'
b'hopefully'
b'streak'
b'balls'
b'pl

b'install'
b'affecting'
b'rugby'
b'forest'
b'cloud'
b'contractor'
b'chest'
b'signature'
b'uniform'
b'accidents'
b'intervention'
b'tougher'
b'teenager'
b'gambling'
b'mortgages'
b'standings'
b'evaluation'
b'planet'
b'dressed'
b'underground'
b'exact'
b'conservation'
b'midfielder'
b'depend'
b'replacing'
b'headline'
b'questioning'
b'athlete'
b'wide_range'
b'comparable'
b'accomplished'
b'ordinance'
b'feared'
b'discussing'
b'riders'
b'copper'
b'unexpected'
b'administrators'
b'favorable'
b'painting'
b'comply'
b'headquartered'
b'gross'
b'picks'
b'diplomatic'
b'lighting'
b'tobacco'
b'feelings'
b'pays'
b'documentary'
b'addresses'
b'senators'
b'linebacker'
b'cleaning'
b'withdrawal'
b'brilliant'
b'custom'
b'priorities'
b'economist'
b'waited'
b'furniture'
b'sub'
b'turnover'
b'costly'
b'height'
b'volumes'
b'till'
b'grab'
b'completing'
b'sample'
b'equipped'
b'delegates'
b'favorites'
b'shortage'
b'publisher'
b'estate'
b'hitter'
b'globe'
b'evaluate'
b'sits'
b'venture'
b'shoppers'
b'designated'
b'tying'


b'invite'
b'disputed'
b'portable'
b'tuition'
b'intentions'
b'app'
b'launches'
b'refusing'
b'graduation'
b'raced'
b'posed'
b'renewable_energy'
b'ought'
b'publish'
b'securing'
b'sole'
b'couples'
b'clashes'
b'separated'
b'breath'
b'panic'
b'elaborate'
b'organizing'
b'mature'
b'directions'
b'meantime'
b'denies'
b'fighter'
b'motivated'
b'plug'
b'weighed'
b'viable'
b'harsh'
b'bitter'
b'reconstruction'
b'assurance'
b'gender'
b'spam'
b'dose'
b'supportive'
b'introducing'
b'outfit'
b'hat'
b'slid'
b'sacks'
b'tanks'
b'lottery'
b'tender'
b'instant'
b'stupid'
b'interface'
b'goalkeeper'
b'enemies'
b'arranged'
b'diplomats'
b'deploy'
b'youths'
b'subsequently'
b'contention'
b'median'
b'jurisdiction'
b'burglary'
b'slower'
b'advantages'
b'wheat'
b'worthy'
b'shame'
b'conflicts'
b'quietly'
b'benefited'
b'diluted_share'
b'complain'
b'inevitable'
b'heating'
b'desert'
b'mountains'
b'internationally'
b'mosque'
b'pressures'
b'ballots'
b'occasionally'
b'besides'
b'turnaround'
b'concessions'
b'remind'
b'hip'
b'ent

b'zoning'
b'vary'
b'architect'
b'medications'
b'defeating'
b'flash'
b'aggregate'
b'naked'
b'correspondent'
b'policemen'
b'uncle'
b'paperwork'
b'patch'
b'turf'
b'counterpart'
b'dip'
b'tent'
b'instantly'
b'cigarettes'
b'medals'
b'shortstop'
b'buys'
b'trader'
b'coordinated'
b'prepares'
b'finest'
b'announcements'
b'wounds'
b'retreat'
b'disorder'
b'regulated'
b'outright'
b'costing'
b'settings'
b'reversed'
b'classrooms'
b'incorporated'
b'strengthened'
b'quotes'
b'shooter'
b'bullish'
b'plaintiffs'
b'downs'
b'themes'
b'distributors'
b'semi_final'
b'revise'
b'precisely'
b'unidentified'
b'exempt'
b'gunman'
b'immigrant'
b'municipalities'
b'tissue'
b'sustainability'
b'rolls'
b'characteristics'
b'belonging'
b'refinery'
b'enthusiastic'
b'filming'
b'capitalize'
b'quarterbacks'
b'bay'
b'sake'
b'correctly'
b'lacking'
b'precise'
b'linking'
b'practiced'
b'creativity'
b'frequency'
b'lovely'
b'iconic'
b'recruits'
b'woods'
b'rivers'
b'unlimited'
b'libraries'
b'gates'
b'accomplishments'
b'dipped'
b'lyrics'
b

b'notices'
b'framing'
b'clues'
b'firefighter'
b'ward'
b'liter'
b'decorated'
b'embarrassed'
b'helm'
b'tightening'
b'collectively'
b'cheering'
b'fascinating'
b'sided'
b'mice'
b'technically'
b'reasonably'
b'notification'
b'replaces'
b'surprises'
b'collided'
b'lining'
b'archive'
b'coordinate'
b'shattered'
b'bandwidth'
b'mouse'
b'constituency'
b'welcomes'
b'ditch'
b'practically'
b'outdoors'
b'shouted'
b'restoring'
b'stabbing'
b'positively'
b'clause'
b'discretion'
b'defending_champion'
b'extremists'
b'drills'
b'abusive'
b'sympathy'
b'ailing'
b'unified'
b'grandson'
b'welcoming'
b'newer'
b'arise'
b'comprising'
b'portfolios'
b'territories'
b'pork'
b'notch'
b'misses'
b'sink'
b'throat'
b'allegation'
b'commercials'
b'reactor'
b'punched'
b'abducted'
b'retirees'
b'frames'
b'admitting'
b'demolition'
b'requesting'
b'hooked'
b'attributable'
b'themed'
b'disrupt'
b'laughs'
b'dean'
b'governmental'
b'judiciary'
b'horizon'
b'hunters'
b'portrait'
b'makeup'
b'emphasize'
b'landfill'
b'utilizing'
b'arc'
b'surgi

b'surgeon'
b'efficiencies'
b'analyzing'
b'bidders'
b'hectares'
b'hatred'
b'money_laundering'
b'prop'
b'accuses'
b'solving'
b'appropriately'
b'tainted'
b'mega'
b'slick'
b'unwanted'
b'tolerate'
b'artificial'
b'herein'
b'cows'
b'influences'
b'leaks'
b'flavors'
b'crush'
b'touchdown_passes'
b'binding'
b'lightweight'
b'enforced'
b'risks_associated'
b'overview'
b'pressured'
b'irresponsible'
b'laughter'
b'transplant'
b'females'
b'continuation'
b'thirty'
b'infants'
b'slate'
b'fatalities'
b'grand_slam'
b'readings'
b'stems'
b'societies'
b'retaining'
b'dunk'
b'lakes'
b'impress'
b'kiss'
b'aunt'
b'bracket'
b'donating'
b'terminated'
b'countered'
b'classmates'
b'dealership'
b'magnificent'
b'brains'
b'charm'
b'cycles'
b'catastrophe'
b'assistants'
b'buck'
b'truce'
b'whereabouts'
b'navy'
b'epidemic'
b'champ'
b'skiing'
b'humble'
b'assignments'
b'chief_economist'
b'geographic'
b'openings'
b'architectural'
b'pronounced'
b'physics'
b'answering'
b'backward'
b'variations'
b'nonetheless'
b'air_conditioning'
b'e

b'sauce'
b'flick'
b'merchant'
b'frightening'
b'prevalent'
b'prescription_drugs'
b'distributes'
b'collector'
b'congress'
b'boast'
b'nevertheless'
b'drummer'
b'scouting'
b'heavier'
b'wrestlers'
b'retains'
b'globalization'
b'subdued'
b'bruised'
b'respectful'
b'cult'
b'twisted'
b'sex_marriage'
b'myth'
b'survivor'
b'work_ethic'
b'ensemble'
b'deadlock'
b'governed'
b'cafeteria'
b'auditors'
b'harmony'
b'uncommon'
b'moisture'
b'spacecraft'
b'gravity'
b'swelling'
b'prevailed'
b'seamless'
b'serial'
b'mortar'
b'archived'
b'aerial'
b'artifacts'
b'hail'
b'prolific'
b'finalize'
b'destiny'
b'poles'
b'mediation'
b'sunshine'
b'extensions'
b'collectors'
b'ankle_injury'
b'headache'
b'derby'
b'dancer'
b'brace'
b'vandalism'
b'privileged'
b'justification'
b'butt'
b'rim'
b'un'
b'gearing'
b'freedoms'
b'canal'
b'honesty'
b'confiscated'
b'bribery'
b'recurring'
b'retreated'
b'passports'
b'startup'
b'integral_part'
b'simulation'
b'rents'
b'dessert'
b'skipped'
b'captures'
b'underwriting'
b'congratulate'
b'advanceme

b'offended'
b'inflammatory'
b'printers'
b'gameplay'
b'unreasonable'
b'utterly'
b'allowances'
b'deepening'
b'dams'
b'compelled'
b'tornadoes'
b'developmental'
b'circulating'
b'scales'
b'wildfires'
b'fours'
b'rests'
b'geared'
b'delete'
b'exhibitors'
b'provoked'
b'separating'
b'suicide_bombers'
b'screamed'
b'lag'
b'adequacy'
b'bachelor'
b'vacancies'
b'recess'
b'customary'
b'punching'
b'observer'
b'exemptions'
b'authenticity'
b'cheapest'
b'videotape'
b'observing'
b'validation'
b'guilty_plea'
b'adjourned'
b'dictatorship'
b'mailing'
b'surgeons'
b'invention'
b'wild_pitch'
b'biased'
b'outlining'
b'automobiles'
b'chess'
b'stricter'
b'interpret'
b'sour'
b'towel'
b'intuitive'
b'accessory'
b'tasting'
b'spine'
b'muster'
b'assaulting'
b'dining_room'
b'gestures'
b'leftist'
b'prescription_drug'
b'curtain'
b'derail'
b'perfection'
b'greenback'
b'pepper'
b'pubs'
b'sunlight'
b'challengers'
b'dubious'
b'subprime'
b'texts'
b'overcoming'
b'workload'
b'expiration'
b'republic'
b'repairing'
b'witnessing'
b'paren

b'veterinary'
b'guru'
b'traveler'
b'hugged'
b'sex_couples'
b'sex_offender'
b'exploits'
b'ashes'
b'staffer'
b'hoops'
b'painter'
b'chartered'
b'upped'
b'greatness'
b'saddened'
b'appointing'
b'spurt'
b'summed'
b'chin'
b'gem'
b'acknowledgment'
b'freak'
b'forbidden'
b'loudly'
b'renovate'
b'longstanding'
b'unconventional'
b'derailed'
b'toast'
b'hassle'
b'coincided'
b'clarification'
b'solo_homer'
b'browse'
b'mired'
b'volcano'
b'fund_raiser'
b'communicated'
b'favoring'
b'prohibiting'
b'extinction'
b'lookout'
b'arthritis'
b'tasty'
b'resisting'
b'discs'
b'endowment'
b'biopharmaceutical'
b'garner'
b'roses'
b'customize'
b'specification'
b'grievances'
b'recognizable'
b'seismic'
b'eco_friendly'
b'baht'
b'bowed'
b'foray'
b'meteorologist'
b'composure'
b'sync'
b'air_pollution'
b'dashed'
b'resellers'
b'geographical'
b'convene'
b'excel'
b'correlation'
b'overseen'
b'apprehended'
b'ducks'
b'unlocked'
b'unfinished'
b'et'
b'logistical'
b'textbook'
b'contending'
b'storey'
b'detonated'
b'footballer'
b'beforeha

b'moderates'
b'rub'
b'herbs'
b'immigration_reform'
b'flowed'
b'pilgrimage'
b'torque'
b'feasibility_study'
b'induction'
b'allergies'
b'east_coast'
b'energized'
b'multibillion_dollar'
b'dysfunctional'
b'tropical_storm'
b'statutes'
b'actresses'
b'downstairs'
b'pristine'
b'posture'
b'stripping'
b'respite'
b'rejoin'
b'relocating'
b'petty'
b'worrisome'
b'oils'
b'cruiser'
b'hormones'
b'jealous'
b'buzzing'
b'downplayed'
b'designated_hitter'
b'inflows'
b'furnishings'
b'lawful'
b'drifting'
b'foresee'
b'imbalance'
b'commanded'
b'turbine'
b'troublesome'
b'pals'
b'podcast'
b'drunken'
b'tweet'
b'stimulating'
b'instructional'
b'invalid'
b'culminating'
b'measurable'
b'gentlemen'
b'flank'
b'radically'
b'courtyard'
b'internationals'
b'bacon'
b'shameful'
b'facial'
b'fatality'
b'divine'
b'comrades'
b'stroll'
b'flare'
b'ramps'
b'stalking'
b'incorrectly'
b'systematically'
b'roaring'
b'lieutenant_governor'
b'standalone'
b'underestimated'
b'landmarks'
b'marrying'
b'regeneration'
b'centimeters'
b'roundup'
b'ex

b'monsters'
b'fooled'
b'enriched'
b'watering'
b'shorten'
b'peaking'
b'star_studded'
b'missionary'
b'assemblies'
b'rabbit'
b'embarking'
b'ventured'
b'segregation'
b'proficient'
b'humility'
b'chaplain'
b'invariably'
b'underdogs'
b'ploy'
b'wonderfully'
b'incarceration'
b'valves'
b'avenge'
b'hooks'
b'leverages'
b'tuning'
b'archdiocese'
b'soak'
b'salads'
b'discus'
b'carded'
b'compel'
b'messed'
b'paste'
b'daddy'
b'specialties'
b'spinach'
b'glue'
b'statues'
b'gripped'
b'prematurely'
b'fleets'
b'nut'
b'hello'
b'horse_racing'
b'gourmet'
b'characterize'
b'shrunk'
b'critique'
b'markedly'
b'strewn'
b'libelous'
b'lapses'
b'magistrates'
b'bashing'
b'walkers'
b'wineries'
b'expansions'
b'sophistication'
b'vent'
b'tuberculosis'
b'desktops'
b'sausage'
b'treaties'
b'hypertension'
b'amazingly'
b'arteries'
b'restrain'
b'flipping'
b'softened'
b'punishable'
b'warships'
b'philanthropic'
b'tuna'
b'eccentric'
b'bittersweet'
b'ablaze'
b'austerity'
b'delegations'
b'unsettled'
b'preach'
b'gems'
b'anthem'
b'glaring

b'elk'
b'sweater'
b'shouts'
b'halved'
b'tray'
b'denounce'
b'hapless'
b'molested'
b'ya'
b'quieter'
b'martial'
b'mildly'
b'rooftops'
b'rescues'
b'spotting'
b'analogy'
b'bounces'
b'serviced'
b'salvation'
b'sentimental'
b'spins'
b'engagements'
b'spicy'
b'fragrance'
b'derive'
b'newsletters'
b'tidy'
b'accommodating'
b'insect'
b'aftermarket'
b'dived'
b'canyon'
b'resisting_arrest'
b'uninterrupted'
b'pleads_guilty'
b'illustration'
b'dissolution'
b'complemented'
b'weaponry'
b'powerless'
b'mishap'
b'sprinted'
b'ref'
b'sucks'
b'skirts'
b'renaissance'
b'raged'
b'troupe'
b'continual'
b'vaccinations'
b'tigers'
b'revolving'
b'entails'
b'selects'
b'multicultural'
b'memos'
b'kph'
b'proponent'
b'lame'
b'punters'
b'lurking'
b'resembled'
b'imposes'
b'tile'
b'sponsorships'
b'plutonium'
b'emailed'
b'subprime_mortgage'
b'roller_coaster'
b'realistically'
b'adherence'
b'abound'
b'stalling'
b'flagging'
b'steering_wheel'
b'overdose'
b'rushes'
b'broker_dealer'
b'trenches'
b'insignificant'
b'squares'
b'triumphs'
b'

b'shattering'
b'keywords'
b'keepers'
b'geology'
b'implant'
b'practitioner'
b'moose'
b'unseat'
b'unseen'
b'ante'
b'denouncing'
b'refueling'
b'gaze'
b'fictitious'
b'religious_beliefs'
b'limp'
b'speculating'
b'antennas'
b'gay_couples'
b'spearheading'
b'elegance'
b'reggae'
b'unwarranted'
b'shady'
b'defaulted'
b'citywide'
b'upturn'
b'astronomers'
b'injure'
b'tracing'
b'networked'
b'thrashed'
b'substitution'
b'adopts'
b'beacon'
b'commit_suicide'
b'concentrates'
b'dec'
b'metabolism'
b'neared'
b'perched'
b'infrared'
b'nudity'
b'fingertips'
b'distracting'
b'blind_eye'
b'registrar'
b'specs'
b'dinosaur'
b'graded'
b'sixteen'
b'protectionism'
b'wrangling'
b'silently'
b'donates'
b'mathematical'
b'plummet'
b'broadest'
b'roadblock'
b'treadmill'
b'decimated'
b'preached'
b'painfully'
b'fatally'
b'visualization'
b'mad_cow_disease'
b'transplants'
b'sulfur'
b'plane_crashed'
b'euphoria'
b'subsided'
b'donuts'
b'millennium'
b'promo'
b'paltry'
b'strap'
b'dine'
b'subscribed'
b'pinnacle'
b'melodies'
b'tryouts'
b

b'sq_ft'
b'irritating'
b'spotty'
b'horrifying'
b'tenuous'
b'offensive_linemen'
b'microphones'
b'contingencies'
b'handbag'
b'billionaires'
b'leniency'
b'biometric'
b'nods'
b'catfish'
b'mastery'
b'chlorine'
b'precious_metal'
b'invoice'
b'scissors'
b'imaginable'
b'francs'
b'debtor'
b'hydraulic'
b'attentive'
b'illusions'
b'intermediary'
b'lagged'
b'dilute'
b'allocating'
b'legitimately'
b'emboldened'
b'marvel'
b'advantageous'
b'acronym'
b'culled'
b'underpinned'
b'joyous'
b'incompatible'
b'sewing'
b'pts'
b'regatta'
b'copyrighted'
b'chocolates'
b'forgiving'
b'poisonous'
b'pixels'
b'respondent'
b'circa'
b'pine'
b'canals'
b'unanticipated'
b'freelance_writer'
b'handshake'
b'elective'
b'confessions'
b'fenced'
b'disposing'
b'stormy'
b'boosters'
b'wellbeing'
b'admirers'
b'shipyard'
b'weakens'
b'predictably'
b'commendable'
b'breather'
b'spawn'
b'proclaim'
b'unbelievably'
b'springboard'
b'easily_accessible'
b'drenched'
b'initials'
b'simplistic'
b'dislocated'
b'shaving'
b'hinting'
b'spooked'
b'supplem

b'monastery'
b'countering'
b'clouded'
b'underwriter'
b'kisses'
b'profitably'
b'riskier'
b'stockholders_equity'
b'deletion'
b'mainframe'
b'lump_sum'
b'whaling'
b'buffalo'
b'additives'
b'hallmarks'
b'engraved'
b'flicked'
b'pooled'
b'biographies'
b'hoses'
b'captivated'
b'endlessly'
b'imperfect'
b'sprinting'
b'overloaded'
b'clumsy'
b'beginner'
b'staircase'
b'patriarch'
b'juices'
b'eyewitness'
b'bailing'
b'sole_possession'
b'bomb_blast'
b'firemen'
b'perfected'
b'medley'
b'exceedingly'
b'peanut'
b'unsold'
b'kittens'
b'buff'
b'grinning'
b'art_gallery'
b'refuted'
b'fashion_designer'
b'en_masse'
b'venturing'
b'underwritten'
b'pod'
b'vaguely'
b'occurrences'
b'increments'
b'dispense'
b'razed'
b'economists_surveyed'
b'archery'
b'physicist'
b'coal_mining'
b'flux'
b'thicker'
b'tease'
b'slab'
b'undesirable'
b'cultural_heritage'
b'bearded'
b'body_armor'
b'gangsters'
b'shippers'
b'hordes'
b'payers'
b'fetal'
b'fanatics'
b'resolute'
b'catalysts'
b'unincorporated'
b'motoring'
b'villas'
b'discard'
b'railro

b'uncompromising'
b'taxpayer_funded'
b'immersive'
b'fry'
b'sweetness'
b'modem'
b'mis'
b'launch_pad'
b'ardent'
b'alternates'
b'sympathize'
b'aggravating'
b'fathom'
b'hi'
b'golf_cart'
b'aggressiveness'
b'stomping'
b'federally_funded'
b'whisked'
b'aloft'
b'programmer'
b'outlandish'
b'normalization'
b'cautionary'
b'fades'
b'cursing'
b'qualitative'
b'lighten'
b'hateful'
b'tranquility'
b'reunions'
b'mixed_martial_arts'
b'torturing'
b'monologue'
b'intolerable'
b'butcher'
b'priesthood'
b'matures'
b'frenetic'
b'chewing'
b'reliance_thereon'
b'rabid'
b'ducked'
b'vascular'
b'sweaty'
b'lipstick'
b'evoked'
b'sightseeing'
b'iced'
b'coldest'
b'stalwarts'
b'shuttles'
b'sodomy'
b'specimen'
b'lesions'
b'conceive'
b'upward_trend'
b'oral_sex'
b'rioters'
b'complimented'
b'thirsty'
b'betterment'
b'heartache'
b'retreats'
b'tribesmen'
b'multinationals'
b'undersized'
b'underclassmen'
b'kitten'
b'interoperable'
b'illustrating'
b'fizzled'
b'accounts_receivable'
b'inscribed'
b'showpiece'
b'aficionados'
b'clustered

b'stances'
b'allowable'
b'composting'
b'clapped'
b'firehouse'
b'dwellers'
b'binds'
b'elitist'
b'liberating'
b'comet'
b'tweeting'
b'cheeky'
b'standardize'
b'shopkeepers'
b'falsified'
b'gift_certificate'
b'sorority'
b'cabinet_ministers'
b'proportionate'
b'atheist'
b'gag_order'
b'waiving'
b'reiterates'
b'continuum'
b'predicated'
b'oft'
b'distinctions'
b'leadoff_hitter'
b'hating'
b'counseled'
b'hardwood'
b'emigrated'
b'payer'
b'lighthouse'
b'parted'
b'curry'
b'ladders'
b'eavesdropping'
b'industrialization'
b'diligence'
b'reprise'
b'snared'
b'melancholy'
b'backseat'
b'designating'
b'sow'
b'peering'
b'mindless'
b'stay_afloat'
b'workstation'
b'undeniably'
b'chopping'
b'combustion'
b'soul_searching'
b'minimizes'
b'coffee_shops'
b'op_ed'
b'plying'
b'debtors'
b'inquiring'
b'highly_touted'
b'cliffs'
b'implored'
b'externally'
b'congratulating'
b'outbursts'
b'fund_raisers'
b'bulb'
b'pelted'
b'spinal'
b'biopsy'
b'bystander'
b'accretive'
b'macho'
b'herald'
b'yeast'
b'ingenious'
b'feces'
b'recurrent'


b'interfaith'
b'spacing'
b'unintended'
b'gold_mineralization'
b'malice'
b'steadied'
b'shooting_rampage'
b'impersonation'
b'chronicling'
b'cremated'
b'chronic_diseases'
b'intangible'
b'reptiles'
b'spruce'
b'depots'
b'broking'
b'lapsed'
b'research_institutes'
b'gambled'
b'lovable'
b'coy'
b'mastering'
b'tech_savvy'
b'loaf'
b'cello'
b'derelict'
b'waded'
b'seawater'
b'transcend'
b'mineral_resource'
b'highly_publicized'
b'haste'
b'amalgamation'
b'aerodynamic'
b'spontaneously'
b'formative'
b'newscast'
b'offensives'
b'unquestionably'
b'prodding'
b'twenties'
b'na'
b'metadata'
b'quagmire'
b'reefs'
b'rekindled'
b'ravine'
b'succumbing'
b'pruning'
b'plunges'
b'necessarily_condone'
b'invitational'
b'nanoparticles'
b'commando'
b'alternately'
b'nerd'
b'sheriffs'
b'pamphlet'
b'snuff'
b'unassuming'
b'rags'
b'manga'
b'digestive'
b'fathered'
b'de_la'
b'pleasant_surprise'
b'mash'
b'friendlies'
b'prodded'
b'concocted'
b'particulars'
b'gearbox'
b'someplace'
b'shootouts'
b'suffocated'
b'credence'
b'kickers'
b

b'applauds'
b'psychedelic'
b'glitter'
b'ranches'
b'luxury_hotels'
b'keynote_speech'
b'treatable'
b'irreverent'
b'erection'
b'precedents'
b'corral'
b'crucially'
b'light_hearted'
b'spiking'
b'overthrew'
b'clutched'
b'rotations'
b'extremist_groups'
b'chords'
b'blunt_force'
b'reinvest'
b'chipsets'
b'designations'
b'sinker'
b'dropouts'
b'write_offs'
b'curl'
b'mutually_beneficial'
b'gazing'
b'nailing'
b'rollback'
b'appraised'
b'pry'
b'crappy'
b'voiced_concern'
b'underdeveloped'
b'encased'
b'spaced'
b'unlawful_possession'
b'wretched'
b'sub_par'
b'slippers'
b'naught'
b'recaptured'
b'rhymes'
b'sandy'
b'seedlings'
b'swaying'
b'parliamentarian'
b'epidemics'
b'culling'
b'backboard'
b'mph_winds'
b'peaches'
b'dissipate'
b'studs'
b'lapped'
b'lamenting'
b'arrays'
b'wiretapping'
b'bargain_hunting'
b'cropping'
b'aspires'
b'newsprint'
b'boycotts'
b'income_earners'
b'reintroduced'
b'coups'
b'stents'
b'forage'
b'overbought'
b'facet'
b'fireball'
b'aggressor'
b'electric_motor'
b'da'
b'rebalancing'
b'nipped'


b'missile_strikes'
b'aggregates'
b'pearl'
b'purification'
b'inscription'
b'coped'
b'bettered'
b'breakers'
b'ratcheted'
b'dyed'
b'lectured'
b'pyrotechnics'
b'slit'
b'panda'
b'instantaneous'
b'modernity'
b'kangaroo'
b'digital_imaging'
b'walled'
b'taint'
b'cracker'
b'seeping'
b'peasant'
b'spied'
b'bun'
b'orally'
b'sizing'
b'squaring'
b'splashing'
b'homophobia'
b'sobbed'
b'bridal'
b'cancer_survivor'
b'holdup'
b'strangle'
b'calculus'
b'unsupervised'
b'commemorated'
b'pest_control'
b'fooling'
b'coupling'
b'horizontally'
b'ducking'
b'chaplains'
b'springtime'
b'curators'
b'ballot_boxes'
b'pled_guilty'
b'chagrin'
b'spiraled'
b'malfunctioning'
b'mansions'
b'affectionate'
b'comprehension'
b'pertains'
b'harbored'
b'fingernails'
b'abatement'
b'refinanced'
b'retroactively'
b'marketplaces'
b'haircuts'
b'crate'
b'acoustic_guitar'
b'flavorful'
b'foggy'
b'hangout'
b'boulevard'
b'smeared'
b'arch_rivals'
b'interspersed'
b'substitutions'
b'indecent_assault'
b'smokes'
b'opportune'
b'diseased'
b'credential'


b'bolsters'
b'semesters'
b'grinned'
b'sprinkling'
b'composites'
b'mourns'
b'underestimating'
b'unwieldy'
b'funnier'
b'recouped'
b'populate'
b'pays_tribute'
b'migraines'
b'sodas'
b'bright_yellow'
b'annulled'
b'cube'
b'communicator'
b'bandh'
b'blue_ribbon'
b'heartwarming'
b'overzealous'
b'pork_barrel'
b'monopolies'
b'price_gouging'
b'pronouncement'
b'dislikes'
b'waterfalls'
b'flashback'
b'hunk'
b'engagement_ring'
b'drug_addict'
b'shivering'
b'scoops'
b'racists'
b'consolidates'
b'promos'
b'accommodates'
b'tidbits'
b'trims'
b'rollers'
b'smug'
b'dislodge'
b'outsized'
b'woe'
b'bookmarks'
b'devout'
b'shortly_afterwards'
b'active_ingredient'
b'localization'
b'metric_ton'
b'white_supremacist'
b'ghastly'
b'widowed'
b'siphoned'
b'uncooperative'
b'concession_stand'
b'surveyors'
b'scribe'
b'attribution'
b'drag_racing'
b'biographical'
b'bemoaned'
b'slowest_pace'
b'plastic_surgeon'
b'nicked'
b'mayoral_candidate'
b'piers'
b'incapacitated'
b'stomachs'
b'mashed_potatoes'
b'farm_subsidies'
b'feline'
b'ba

b'slur'
b'repelled'
b'hemp'
b'mercenary'
b'chiropractor'
b'disseminating'
b'decoy'
b'microcosm'
b'sinkhole'
b'committing_suicide'
b'flounder'
b'clinician'
b'grossly'
b'symbolically'
b'sprouting'
b'broken_ribs'
b'chuckling'
b'halfcourt'
b'stalker'
b'ops'
b'forested'
b'jostling'
b'allocates'
b'midyear'
b'navigated'
b'largesse'
b'swanky'
b'vested_interests'
b'authoring'
b'empirical'
b'pooch'
b'ballparks'
b'subtlety'
b'gusher'
b'renewable_fuels'
b'rebuked'
b'primary_endpoint'
b'descendant'
b'mausoleum'
b'sixties'
b'atypical'
b'massacred'
b'actively_engaged'
b'friendlier'
b'pigeon'
b'indecision'
b'giggle'
b'harmonize'
b'email_newsletter'
b'livid'
b'divested'
b'nots'
b'kidney_transplant'
b'mentally_disabled'
b'vernacular'
b'bereavement'
b'conjecture'
b'merited'
b'spades'
b'microbial'
b'sustenance'
b'reformists'
b'multiculturalism'
b'optimizes'
b'ambushes'
b'parkland'
b'edict'
b'boe'
b'commonly_referred'
b'tuxedo'
b'kickoff_returns'
b'intranet'
b'inkling'
b'reinvested'
b'stitching'
b'tombs'
b

b'flagrant_offenders'
b'gobbled'
b'constructively'
b'rearing'
b'renown'
b'eaters'
b'sputtered'
b'bulldog'
b'evictions'
b'instigating'
b'pancake'
b'thronged'
b'markup'
b'vociferous'
b'blond_hair'
b'gambit'
b'anthropologist'
b'profane_comments'
b'epitomizes'
b'ceding'
b'biochemical'
b'friendliness'
b'lacerations'
b'meth_lab'
b'lengthening'
b'rebranded'
b'upholstery'
b'authenticate'
b'devoured'
b'lengthened'
b'painkiller'
b'smallpox'
b'hereditary'
b'quieted'
b'affectionately'
b'infielders'
b'decentralization'
b'downed_trees'
b'nominally'
b'regular_intervals'
b'boat_ramp'
b'midterm_election'
b'worth_mentioning'
b'erodes'
b'deeply_divided'
b'corralled'
b'setups'
b'playwrights'
b'inhibited'
b'afterlife'
b'shipyards'
b'ulcers'
b'greening'
b'reversals'
b'hibernation'
b'simulating'
b'watersheds'
b'ruckus'
b'plucky'
b'raccoon'
b'detectable'
b'watcher'
b'authenticated'
b'misfired'
b'contrarian'
b'horrid'
b'reassert'
b'subsistence'
b'jumpstart'
b'minefield'
b'encoded'
b'focal'
b'dissenters'
b'gran

b'propagation'
b'rd'
b'chubby'
b'blowouts'
b'ferret'
b'wrangle'
b'tamoxifen'
b'rollicking'
b'cowards'
b'blue_skies'
b'uttering'
b'launder'
b'lemons'
b'minimally_invasive'
b'ness'
b'invigorating'
b'toolbox'
b'sofas'
b'micrograms'
b'lotion'
b'lawn_chairs'
b'dangled'
b'flaring'
b'giggles'
b'ridding'
b'wildest_dreams'
b'skirted'
b'greenfield'
b'unabashed'
b'oceanfront'
b'protruding'
b'kidney_stones'
b'orthodoxy'
b'athletic_trainer'
b'pluralism'
b'indistinguishable'
b'sighs'
b'hollering'
b'atrial_fibrillation'
b'sedate'
b'manly'
b'aspired'
b'recessed'
b'picket_lines'
b'vultures'
b'freezers'
b'customer_centric'
b'regimens'
b'shelf_registration'
b'helplessness'
b'co_ops'
b'latex'
b'cardholder'
b'screwing'
b'lynching'
b'delving'
b'cell_lung_cancer'
b'caskets'
b'populous'
b'blazer'
b'graphically'
b'clean_sheets'
b'superstition'
b'reconstituted'
b'umpiring'
b'repo_rate'
b'negotiable'
b'remedied'
b'trickier'
b'downpours'
b'unconvinced'
b'darting'
b'stringer'
b'ma'
b'salvo'
b'skim'
b'prepayment'
b

b'landlines'
b'soundly'
b'undercover_agent'
b'grins'
b'polysilicon'
b'bagels'
b'gazed'
b'commandeered'
b'frontage'
b'thrift_store'
b'oozing'
b'shakes_hands'
b'bare_minimum'
b'multimillionaire'
b'geometric'
b'reacted_angrily'
b'luscious'
b'figurines'
b'fraternal'
b'panache'
b'daylight_saving'
b'deplete'
b'thump'
b'interviewers'
b'metabolic_syndrome'
b'memorable_moments'
b'drapes'
b'knocked_unconscious'
b'touchy'
b'manifold'
b'psychotherapy'
b'finality'
b'headscarf'
b'stormy_weather'
b'outcast'
b'fining'
b'scuffled'
b'tactile'
b'tree_limbs'
b'trampling'
b'hallucinations'
b'deepwater_drilling'
b'urn'
b'noncommittal'
b'bestow'
b'inert'
b'subtext'
b'replenished'
b'anathema'
b'cleantech'
b'volts'
b'reevaluate'
b'bachelor_degrees'
b'ballerina'
b'overshadowing'
b'bumper_stickers'
b'uncharted_territory'
b'unenforceable'
b'forgoing'
b'tort_reform'
b'jailers'
b'exasperation'
b'conveyor_belt'
b'heparin'
b'pawns'
b'purveyor'
b'downwind'
b'allergens'
b'permeates'
b'membranes'
b'hijackings'
b'tribal_

In [9]:
wordsim.pprint_w2vnews(result_original, result_debiased)
# Question: which WS?

+---------------+----------+---------------+--+--+--+
|               | EN-RG-65 | EN-WS-353-ALL |  |  |  |
+---------------+----------+---------------+--+--+--+
|     Before    |   nan    |      nan      |
| Hard-debiased |   nan    |      nan      |
+---------------+----------+---------------+--+--+--+


# Full W2vNEWS set

## 1: original word embeddings on RG & WS

Sources:

#### RG: H. Rubenstein and J. B. Goodenough. Contextual correlates of synonymy. Communications of the ACM, 8(10):627–633, 1965.

####  WS: L. Finkelstein, E. Gabrilovich, Y. Matias, E. Rivlin, Z. Solan, G. Wolfman, and E. Ruppin. Placing search in context: The concept  revisited. In WWW. ACM, 2001.


In [10]:
# Load google news word2vec
E = WordEmbedding('./embeddings/GoogleNews-vectors-negative300.bin')

*** Reading data from ./embeddings/GoogleNews-vectors-negative300.bin
(3000000, 300)
3000000 words of dimension 300 : </s>, in, for, that, ..., Bim_Skala_Bim, Mezze_Cafe, pulverizes_boulders, snowcapped_Caucasus
3000000 words of dimension 300 : </s>, in, for, that, ..., Bim_Skala_Bim, Mezze_Cafe, pulverizes_boulders, snowcapped_Caucasus


### Wordsim benchmarks
Code adapted from source 

#### embedding-evaluation: https://github.com/k-kawakami/embedding-evaluation

In [11]:
wordsim = Wordsim("en")
word2vec = wordsim.load_vector('./embeddings/GoogleNews-vectors-negative300.bin', binary=True)
result_original = wordsim.evaluate(word2vec)
wordsim.pprint(result_original)

load_vector
loaded vector 3000000 words found ..


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



## 2: Debiased word embeddings on RG & WS


### Step 2a: Hard debiased

In [12]:
from debiaswe.debias import debias

In [13]:
# Lets load some gender related word lists to help us with debiasing
with open('./data/definitional_pairs.json', "r") as f:
    defs = json.load(f)
print("definitional", defs)

with open('./data/equalize_pairs.json', "r") as f:
    equalize_pairs = json.load(f)

with open('./data/gender_specific_seed.json', "r") as f:
    gender_specific_words = json.load(f)
# print("gender specific", len(gender_specific_words), gender_specific_words[:10])

definitional [['woman', 'man'], ['girl', 'boy'], ['she', 'he'], ['mother', 'father'], ['daughter', 'son'], ['gal', 'guy'], ['female', 'male'], ['her', 'his'], ['herself', 'himself'], ['Mary', 'John']]


In [14]:
debias(E, gender_specific_words, defs, equalize_pairs)
print("Saving to file...")
E.save('./embeddings/w2v_gnews_debiased_small.txt')
print("\n\nDone!\n")

3000000 words of dimension 300 : </s>, in, for, that, ..., Bim_Skala_Bim, Mezze_Cafe, pulverizes_boulders, snowcapped_Caucasus
{('gentlemen', 'ladies'), ('males', 'females'), ('FATHERHOOD', 'MOTHERHOOD'), ('FRATERNITY', 'SORORITY'), ('Father', 'Mother'), ('CONGRESSMAN', 'CONGRESSWOMAN'), ('dudes', 'gals'), ('KINGS', 'QUEENS'), ('dad', 'mom'), ('catholic_priest', 'nun'), ('boy', 'girl'), ('twin_brother', 'twin_sister'), ('CATHOLIC_PRIEST', 'NUN'), ('colt', 'filly'), ('COUNCILMAN', 'COUNCILWOMAN'), ('boys', 'girls'), ('King', 'Queen'), ('His', 'Her'), ('FATHERS', 'MOTHERS'), ('dads', 'moms'), ('prince', 'princess'), ('GELDING', 'MARE'), ('Spokesman', 'Spokeswoman'), ('Male', 'Female'), ('Wives', 'Husbands'), ('COLT', 'FILLY'), ('Brothers', 'Sisters'), ('Councilman', 'Councilwoman'), ('Fathers', 'Mothers'), ('SCHOOLBOY', 'SCHOOLGIRL'), ('his', 'her'), ('grandson', 'granddaughter'), ('HE', 'SHE'), ('KING', 'QUEEN'), ('Businessman', 'Businesswoman'), ('Fella', 'Granny'), ('Testosterone', 'E

MemoryError: 

In [None]:
# Do benchmark for hard-debiased
wordsim = Wordsim("en")
word2vec = wordsim.load_vector('./embeddings/w2v_gnews_debiased_small.txt')
result_debiased = wordsim.evaluate(word2vec)
print("ORIGINAL")
wordsim.pprint(result_original)
print("DEBIASED")
wordsim.pprint(result_debiased)

In [None]:
wordsim.pprint_w2vnews(result_original, result_debiased)
# Question: which WS?