In [1]:
import numpy as np
import random
import gensim
from nltk.corpus import words
from functools import reduce
import heapq
import os
import collections
from sklearn.cluster import KMeans
from sklearn import metrics

In [2]:
model = gensim.models.KeyedVectors.load_word2vec_format('glove.6B.300d.w2vformat.txt', binary=False)

In [20]:

words_upper = ["Hollywood", "Well", "Foot", "NewYork", "Spring", "Court", "Tube", "Point", "Tablet", "Slip", "Date", "Drill", "Lemon", "Bell", "Screen", "Fair", "Torch", "State", "Match", "Iron", "Block", "France", "Australia", "Limousine", "Stream", "Glove", "Nurse", "Leprechaun", "Play", "Tooth", "Arm", "Bermuda", "Diamond", "Whale", "Comic", "Mammoth", "Green", "Pass", "Missile", "Paste", "Drop", "Pheonix", "Marble", "Staff", "Figure", "Park", "Centaur", "Shadow", "Fish", "Cotton", "Egypt", "Theater", "Scale", "Fall", "Track", "Force", "Dinosaur", "Bill", "Mine", "Turkey", "March", "Contract", "Bridge", "Robin", "Line", "Plate", "Band", "Fire", "Bank", "Boom", "Cat", "Shot", "Suit", "Chocolate", "Roulette", "Mercury", "Moon", "Net", "Lawyer", "Satellite", "Angel", "Spider", "Germany", "Fork", "Pitch", "King", "Crane", "Trip", "Dog", "Conductor", "Part", "Bugle", "Witch", "Ketchup", "Press", "Spine", "Worm", "Alps", "Bond", "Pan", "Beijing", "Racket", "Cross", "Seal", "Aztec", "Maple", "Parachute", "Hotel", "Berry", "Soldier", "Ray", "Post", "Greece", "Square", "Mass", "Bat", "Wave", "Car", "Smuggler", "England", "Crash", "Tail", "Card", "Horn", "Capital", "Fence", "Deck", "Buffalo", "Microscope", "Jet", "Duck", "Ring", "Train", "Field", "Gold", "Tick", "Check", "Queen", "Strike", "Kangaroo", "Spike", "Scientist", "Engine", "Shakespeare", "Wind", "Kid", "Embassy", "Robot", "Note", "Ground", "Draft", "Ham", "War", "Mouse", "Center", "China", "Bolt", "Spot", "Piano", "Pupil", "Plot", "Lion", "Police", "Head", "Litter", "Concert", "Mug", "Vacuum", "Atlantis", "Straw", "Switch", "Skyscraper", "Laser", "Scuba Diver", "Africa", "Plastic", "Dwarf", "Lap", "Life", "Honey", "Horseshoe", "Unicorn", "Spy", "Pants", "Wall", "Paper", "Sound", "Ice", "Tag", "Web", "Fan", "Orange", "Temple", "Canada", "Scorpion", "Undertaker", "Mail", "Europe", "Soul", "Apple", "Pole", "Tap", "Mouth", "Ambulance", "Dress", "IceCream", "Rabbit", "Buck", "Agent", "Sock", "Nut", "Boot", "Ghost", "Oil", "Superhero", "Code", "Kiwi", "Hospital", "Saturn", "Film", "Button", "Snowman", "Helicopter", "Log", "Princess", "Time", "Cook", "Revolution", "Shoe", "Mole", "Spell", "Grass", "Washer", "Game", "Beat", "Hole", "Horse", "Pirate", "Link", "Dance", "Fly", "Pit", "Server", "School", "Lock", "Brush", "Pool", "Star", "Jam", "Organ", "Berlin", "Face", "Luck", "Amazon", "Cast", "Gas", "Club", "Sink", "Water", "Chair", "Shark", "Jupiter", "Copper", "Jack", "Platypus", "Stick", "Olive", "Grace", "Bear", "Glass", "Row", "Pistol", "London", "Rock", "Van", "Vet", "Beach", "Charge", "Port", "Disease", "Palm", "Moscow", "Pin", "Washington", "Pyramid", "Opera", "Casino", "Pilot", "String", "Night", "Chest", "Yard", "Teacher", "Pumpkin", "Thief", "Bark", "Bug", "Mint", "Cycle", "Telescope", "Calf", "Air", "Box", "Mount", "Thumb", "Antarctica", "Trunk", "Snow", "Penguin", "Root", "Bar", "File", "Hawk", "Battery", "Compound", "Slug", "Octopus", "Whip", "America", "Ivory", "Pound", "Sub", "Cliff", "Lab", "Eagle", "Genius", "Ship", "Dice", "Hood", "Heart", "Novel", "Pipe", "Himalayas", "Crown", "Round", "India", "Needle", "Shop", "Watch", "Lead", "Tie", "Table", "Cell", "Cover", "Czech", "Back", "Bomb", "Ruler", "Forest", "Bottle", "Space", "Hook", "Doctor", "Ball", "Bow", "Degree", "Rome", "Plane", "Giant", "Nail", "Dragon", "Stadium", "Flute", "Carrot", "Wake", "Fighter", "Model", "Tokyo", "Eye", "Mexico", "Hand", "Swing", "Key", "Alien", "Tower", "Poison", "Cricket", "Cold", "Knife", "Church", "Board", "Cloak", "Ninja", "Olympus", "Belt", "Light", "Death", "Stock", "Millionaire", "Day", "Knight", "Pie", "Bed", "Circle", "Rose", "Change", "Cap", "Triangle", "Chick"]
words = [x.lower() for x in words_upper]

# generate new board
def new_game(words) :
    board = random.sample(words, 25)

    p1 = board[:9]
    p2 = board[9:17]
    neu = board[17:24]
    assassin = [board[24]]
    p1_vecs = model[p1]
    p2_vecs = model[p2]
    assassin_vec = model[assassin]
    
    return board, p1, p2, neu, assassin, p1_vecs, p2_vecs, assassin_vec
    
# https://stackoverflow.com/questions/40828929/sklearn-mean-distance-from-centroid-of-each-cluster

def clustering(vecs, n=5) :
    initial = KMeans(n_clusters=n)
    clusters = initial.fit_predict(vecs)
    centroids = initial.cluster_centers_
    
    # finding cluster size and tightness
    mean_dists = {}
    mean_count = {}
    for i in range(n):
        mean_dists[i] = 0
        mean_count[i] = 0
    
    for i in range(9) :
        cluster = clusters[i]
        centroid = centroids[cluster]
        dist = np.linalg.norm(centroid-p1_vecs[i])
        mean_dists[cluster] += dist
        mean_count[cluster] += 1
    
    mean_of_cluster = []
    for i in range(n):
        mean_of_cluster.append(mean_dists[i]/mean_count[i])
    
    return clusters, mean_count, mean_of_cluster

# finding largest cluster from list of clusters
def largest_cluster(clusters, mean_count) :
    cur_max = mean_count[0]
    max_i = 0
    for i in range(len(mean_count)):
        if mean_count[i] > cur_max :
            max_i = i
            cur_max = mean_count[i]
    
    res = []
    
    for i in range(len(clusters)) :
        if clusters[i] == max_i :
            res.append(p1[i])
            
    return max_i, res

# spymaster gives hint based on word cluster, opponent's words, and assassin
def give_hint(pos, neg, restriction=50000):
    full_hint = model.most_similar(positive=pos, negative=neg, restrict_vocab=restriction)
    index = 0
    for i, word in enumerate(board):
        if full_hint[index][0] in word or word in full_hint[index][0]:
            index += 1
            i = 0
    
    print "full hint", full_hint
    return full_hint[index][0]

# evalutes hints for similarities to team's words
def hint_evaluator(remaining, hint, cluster) :
    
    similarities = []
    for word in remaining:
        prob = model.similarity(hint, word)
        similarities.append((word, prob))
    
    sort_by_similarity = sorted(similarities, key=lambda tup: tup[1], reverse=True)
    print sort_by_similarity
    res = 0
    for i in range(len(remaining)) :
        if sort_by_similarity[i][1] > -0.15 :
            res += 1
        
        if res >= cluster :
            break
    
    return res

# guesser agent compares remaining words on board with hint, and returns number of words with highest similarities

def guesser(remaining, hint, num=1) :
    
    potential = []
    for word in remaining:
        prob = model.similarity(hint, word)
        potential.append((word, prob))
    
    potential_sorted = sorted(potential, key=lambda tup: tup[1], reverse=True)
    print potential_sorted
    return potential_sorted[:num]


In [21]:

board, p1, p2, neu, assassin, p1_vecs, p2_vecs, assassin_vec = new_game(words)

print board
print p1


['change', 'compound', 'ketchup', 'trip', 'port', 'play', 'soldier', 'ring', 'pin', 'car', 'deck', 'washer', 'snowman', 'scale', 'spine', 'atlantis', 'ice', 'stick', 'satellite', 'ruler', 'sound', 'dress', 'spot', 'rock', 'snow']
['change', 'compound', 'ketchup', 'trip', 'port', 'play', 'soldier', 'ring', 'pin']


In [22]:
clusters, mean_count, mean_of_cluster = clustering(p1_vecs, n=5)

num, largest = largest_cluster(clusters, mean_count)
print largest
# hint = give_hint(largest, [p2+assassin], restriction=20000)
hint = give_hint(largest, [], restriction=20000)
num_words = hint_evaluator(p1, hint, len(largest))
print hint, num_words
print guesser(board, hint, num_words)


['change', 'trip', 'play']
full hint [(u'take', 0.6786340475082397), (u'next', 0.6770526170730591), (u'come', 0.6759542226791382), (u'going', 0.6758896708488464), (u'this', 0.6729804277420044), (u'way', 0.6664543747901917), (u'make', 0.6662427186965942), (u'what', 0.6587151288986206), (u'coming', 0.6571078300476074), (u'because', 0.6545442342758179)]
[('change', 0.5707897), ('play', 0.49331045), ('trip', 0.4262883), ('ring', 0.24220693), ('port', 0.21600766), ('soldier', 0.19556919), ('compound', 0.14017743), ('pin', 0.12916069), ('ketchup', -0.005581744)]
take 3
[('change', 0.5707897), ('play', 0.49331045), ('trip', 0.4262883), ('spot', 0.3621546), ('stick', 0.35080212), ('car', 0.31223822), ('scale', 0.29407406), ('sound', 0.24914451), ('ring', 0.24220693), ('dress', 0.23027161), ('ice', 0.22713318), ('snow', 0.22415909), ('rock', 0.22221188), ('port', 0.21600766), ('soldier', 0.19556919), ('satellite', 0.18247688), ('ruler', 0.17273253), ('deck', 0.16340013), ('compound', 0.14017743

In [51]:
# other choice: find largest count


combined = [0, trial1[0], trial2[0], trial3[0]]

words1 = largest_cluster(trial1[0], trial1[1])
words2 = largest_cluster(trial2[0], trial2[1])

print words1

(1, ['alien', 'key', 'europe', 'alps', 'pound'])


In [23]:
full_hint = model.most_similar(
    positive=words1[1],
    negative=p2 + [assassin],
    restrict_vocab=50000
)

In [22]:
index = 0
all_checked = False
for i, word in enumerate(board):
#         print word
#         print full_hint[index][0]
    if full_hint[index][0] in word or word in full_hint[index][0]:
        index += 1
        i = 0
    
hint = full_hint[index][0]
print hint

В


In [13]:
print assassin

theater


In [24]:
similarities = []
for word in board:
    prob = model.similarity(hint, word)
    similarities.append((word, prob))
    
sort_by_similarity = sorted(similarities, key=lambda tup: tup[1], reverse=True)

res = 0
for i in range(len(words1[1])) :
    if sort_by_similarity[i][1] > 0.2 :
        res += 1
        

print sort_by_similarity[:len(words1[1])]

[('snow', -0.063674614), ('amazon', -0.08624632), ('straw', -0.0966843), ('skyscraper', -0.10037614), ('box', -0.10054034)]


In [26]:
print words1

(1, ['space', 'grace', 'smuggler', 'time', 'temple'])
