In [4]:
import numpy as np
import random
import gensim
from nltk.corpus import words
from functools import reduce
import heapq
import os
import collections
from sklearn.cluster import KMeans
from sklearn import metrics
import copy

In [5]:
model = gensim.models.KeyedVectors.load_word2vec_format('glove.6B.300d.w2vformat.txt', binary=False)

In [42]:

words_upper = ["Hollywood", "Well", "Foot", "NewYork", "Spring", "Court", "Tube", "Point", "Tablet", "Slip", "Date", "Drill", "Lemon", "Bell", "Screen", "Fair", "Torch", "State", "Match", "Iron", "Block", "France", "Australia", "Limousine", "Stream", "Glove", "Nurse", "Leprechaun", "Play", "Tooth", "Arm", "Bermuda", "Diamond", "Whale", "Comic", "Mammoth", "Green", "Pass", "Missile", "Paste", "Drop", "Pheonix", "Marble", "Staff", "Figure", "Park", "Centaur", "Shadow", "Fish", "Cotton", "Egypt", "Theater", "Scale", "Fall", "Track", "Force", "Dinosaur", "Bill", "Mine", "Turkey", "March", "Contract", "Bridge", "Robin", "Line", "Plate", "Band", "Fire", "Bank", "Boom", "Cat", "Shot", "Suit", "Chocolate", "Roulette", "Mercury", "Moon", "Net", "Lawyer", "Satellite", "Angel", "Spider", "Germany", "Fork", "Pitch", "King", "Crane", "Trip", "Dog", "Conductor", "Part", "Bugle", "Witch", "Ketchup", "Press", "Spine", "Worm", "Alps", "Bond", "Pan", "Beijing", "Racket", "Cross", "Seal", "Aztec", "Maple", "Parachute", "Hotel", "Berry", "Soldier", "Ray", "Post", "Greece", "Square", "Mass", "Bat", "Wave", "Car", "Smuggler", "England", "Crash", "Tail", "Card", "Horn", "Capital", "Fence", "Deck", "Buffalo", "Microscope", "Jet", "Duck", "Ring", "Train", "Field", "Gold", "Tick", "Check", "Queen", "Strike", "Kangaroo", "Spike", "Scientist", "Engine", "Shakespeare", "Wind", "Kid", "Embassy", "Robot", "Note", "Ground", "Draft", "Ham", "War", "Mouse", "Center", "China", "Bolt", "Spot", "Piano", "Pupil", "Plot", "Lion", "Police", "Head", "Litter", "Concert", "Mug", "Vacuum", "Atlantis", "Straw", "Switch", "Skyscraper", "Laser", "Scuba Diver", "Africa", "Plastic", "Dwarf", "Lap", "Life", "Honey", "Horseshoe", "Unicorn", "Spy", "Pants", "Wall", "Paper", "Sound", "Ice", "Tag", "Web", "Fan", "Orange", "Temple", "Canada", "Scorpion", "Undertaker", "Mail", "Europe", "Soul", "Apple", "Pole", "Tap", "Mouth", "Ambulance", "Dress", "IceCream", "Rabbit", "Buck", "Agent", "Sock", "Nut", "Boot", "Ghost", "Oil", "Superhero", "Code", "Kiwi", "Hospital", "Saturn", "Film", "Button", "Snowman", "Helicopter", "Log", "Princess", "Time", "Cook", "Revolution", "Shoe", "Mole", "Spell", "Grass", "Washer", "Game", "Beat", "Hole", "Horse", "Pirate", "Link", "Dance", "Fly", "Pit", "Server", "School", "Lock", "Brush", "Pool", "Star", "Jam", "Organ", "Berlin", "Face", "Luck", "Amazon", "Cast", "Gas", "Club", "Sink", "Water", "Chair", "Shark", "Jupiter", "Copper", "Jack", "Platypus", "Stick", "Olive", "Grace", "Bear", "Glass", "Row", "Pistol", "London", "Rock", "Van", "Vet", "Beach", "Charge", "Port", "Disease", "Palm", "Moscow", "Pin", "Washington", "Pyramid", "Opera", "Casino", "Pilot", "String", "Night", "Chest", "Yard", "Teacher", "Pumpkin", "Thief", "Bark", "Bug", "Mint", "Cycle", "Telescope", "Calf", "Air", "Box", "Mount", "Thumb", "Antarctica", "Trunk", "Snow", "Penguin", "Root", "Bar", "File", "Hawk", "Battery", "Compound", "Slug", "Octopus", "Whip", "America", "Ivory", "Pound", "Sub", "Cliff", "Lab", "Eagle", "Genius", "Ship", "Dice", "Hood", "Heart", "Novel", "Pipe", "Himalayas", "Crown", "Round", "India", "Needle", "Shop", "Watch", "Lead", "Tie", "Table", "Cell", "Cover", "Czech", "Back", "Bomb", "Ruler", "Forest", "Bottle", "Space", "Hook", "Doctor", "Ball", "Bow", "Degree", "Rome", "Plane", "Giant", "Nail", "Dragon", "Stadium", "Flute", "Carrot", "Wake", "Fighter", "Model", "Tokyo", "Eye", "Mexico", "Hand", "Swing", "Key", "Alien", "Tower", "Poison", "Cricket", "Cold", "Knife", "Church", "Board", "Cloak", "Ninja", "Olympus", "Belt", "Light", "Death", "Stock", "Millionaire", "Day", "Knight", "Pie", "Bed", "Circle", "Rose", "Change", "Cap", "Triangle", "Chick"]
words = [x.lower() for x in words_upper]
for word in words :
    if ' ' in word :
        words.remove(word)


# generate new board
def new_game(words) :
    board = random.sample(words, 25)

    p1 = board[:9]
    p2 = board[9:17]
    neu = board[17:24]
    assassin = [board[24]]
    p1_vecs = model[p1]
    p2_vecs = model[p2]
    assassin_vec = model[assassin]
    
    return board, p1, p2, neu, assassin, p1_vecs, p2_vecs, assassin_vec
    
# https://stackoverflow.com/questions/40828929/sklearn-mean-distance-from-centroid-of-each-cluster

def clustering(vecs, n=5) :
    initial = KMeans(n_clusters=n)
    clusters = initial.fit_predict(vecs)
    centroids = initial.cluster_centers_
    
    # finding cluster size and tightness
    mean_dists = {}
    mean_count = {}
    for i in range(n):
        mean_dists[i] = 0
        mean_count[i] = 0
    
    for i in range(len(vecs)) :
        cluster = clusters[i]
        centroid = centroids[cluster]
        dist = np.linalg.norm(centroid-p1_vecs[i])
        mean_dists[cluster] += dist
        mean_count[cluster] += 1
    
    mean_of_cluster = []
    for i in range(n):
        mean_of_cluster.append(mean_dists[i]/mean_count[i])
    
    return clusters, mean_count, mean_of_cluster

# finding largest cluster from list of clusters
def largest_cluster(clusters, mean_count, player) :
    cur_max = mean_count[0]
    max_i = 0
    for i in range(len(mean_count)):
        if mean_count[i] > cur_max :
            max_i = i
            cur_max = mean_count[i]
    
    res = []
    
    for i in range(len(clusters)) :
        if clusters[i] == max_i :
            res.append(player[i])
            
    return max_i, res

# find tightest cluster from list of clusters
def tightest_cluster(clusters_list, mean_count_list, tightness_list, player) :
    
    current_max = None
    current_count = 0
    turn_max = 0
    largest_clusters = []
    
    for turn in range(3):
        clusters = clusters_list[turn]
        mean_count = mean_count_list[turn]
        tightness = tightness_list[turn]
        
        clusters_by_size = []
        max_i = 0
        current_size = 0

        for i in range(len(mean_count)):
            clusters_by_size.append((tightness[i], mean_count[i], i))

        sorted_clusters_by_size = sorted(clusters_by_size, key=lambda tup: tup[1], reverse=True)
        
        for i, cluster in enumerate(sorted_clusters_by_size):
            
            if i == 0:
                largest_clusters.append((cluster, turn))
            if cluster[1] > current_count:
                if (cluster[1] >= 6 and cluster[0] < 6.3) or (cluster[1] >= 5 and cluster[0] < 6) or (cluster[1] >= 4 and cluster[0] < 5.7) or cluster[0] < 5.5:
                    current_max = cluster
                    current_count = cluster[1]
                    turn_max = turn
                    break
            elif cluster[1] == current_count:
                if cluster[0] < current_max[0]:
                    current_max = cluster
                    current_count = cluster[1]
                    turn_max = turn
                    break
                    
    if current_max == None:
        largest_clusters_sorted = sorted(largest_clusters, key=lambda tup: tup[0][0])
        current_max = largest_clusters_sorted[0][0]
        current_count = largest_clusters_sorted[0][0][1]
        turn_max = largest_clusters_sorted[0][1]
        
    res = []
    
    for i in range(len(clusters_list[turn_max])) :
        if clusters_list[turn_max][i] == current_max[2] :
            res.append(player[i])
    
#     print "current max:", current_max
    return current_max[2], res

# spymaster gives hint based on word cluster, opponent's words, and assassin
def give_hint(pos, neg, restriction=50000):
    full_hint = model.most_similar(positive=pos, negative=neg, restrict_vocab=restriction)
    index = 0
    i = 0
    while i < len(board):
        if full_hint[index][0] in board[i] or board[i] in full_hint[index][0]:
            index += 1
            i = -1
        i += 1
    top_5_hints = []
    for i in range(5) :
        top_5_hints.append(full_hint[i][0])
        
    return full_hint[index][0]

# evalutes hints for similarities to team's words
def hint_evaluator(remaining, hint, cluster) :
    
    similarities = []
    for word in remaining:
        prob = model.similarity(hint, word)
        similarities.append((word, prob))
    
    sort_by_similarity = sorted(similarities, key=lambda tup: tup[1], reverse=True)
    res = 0
    for i in range(len(remaining)) :
        if sort_by_similarity[i][1] > 0.1 :
            res += 1
        
        if res >= cluster :
            break
    
    return res

# guesser agent compares remaining words on board with hint, and returns number of words with highest similarities

def guesser(remaining, hint, num=1) :
    
    potential = []
    for word in remaining:
        prob = model.similarity(hint, word)
        potential.append((word, prob))
    
    potential_sorted = sorted(potential, key=lambda tup: tup[1], reverse=True)
    return potential_sorted[:num]

def new_guesser(player_q, turn, hint, num_words) :
    new_q = []
    similarities = []
    unweighted = []
    guesser = []
    
    for item in player_q :
        similarity = model.similarity(hint, item[0])
        similarities.append((item[0], similarity))
        
    sort_by_similarity = sorted(similarities, key=lambda tup: tup[1], reverse=True)
    top_guesses = sort_by_similarity[:num_words]
    
    if turn :
        for item in player_q :
            added = False
            for guess in top_guesses:
                if item[0] == guess[0]:
                    guesser.append((item[0], item[1] + 1.75 * guess[1]))
                    new_prob = item[1] + guess[1]
                    new_q.append((item[0], new_prob))
                    added = True
            if added == False:
                new_q.append((item[0], item[1]))
                guesser.append((item[0], item[1]))
                
    else :        
        for item in player_q :
            added = False
            for guess in top_guesses:
                if item[0] == guess[0]:
                    new_prob = item[1] - guess[1]
                    new_q.append((item[0], new_prob))
                    added = True
            if added == False:
                new_q.append((item[0], item[1]))
        
    new_q = sorted(new_q, key=lambda tup: tup[1], reverse=True)
    guesser_sorted = sorted(guesser, key=lambda tup: tup[1], reverse=True)

#     print "unweighted:", sort_by_similarity
    
    return new_q, guesser_sorted

# Taking a turn
def take_turn(board, p1, p2, p1_q, p2_q, assassin, turn, cutoff) :
    
    p2_turn = not turn
#     print "board: ", board
    
    new_q1 = []
    new_q2 = []
    for item in p1_q :
        if item[0] in board :
            new_q1.append(item)
    
    for item2 in p2_q :
        if item2[0] in board :
            new_q2.append(item2)
    
    p1_q = new_q1
    p2_q = new_q2
            
#     print "edited p1_q", p1_q
#     print "edited p2_q", p2_q
    
    if turn :
        player = p1
        player_vecs = model[p1]
        n = max(len(p1)/2, 2)
    else :
        player = p2
        player_vecs = model[p2]
        n = max(len(p2)/2, 2)
        
    # clustering based on remaining words
    
    clusters_list = []
    mean_count_list = []
    mean_of_cluster_list = []
    
    for i in range(3):
        clusters_temp, mean_count_temp, mean_of_cluster_temp = clustering(player_vecs, min(n - 1 + i, len(player)))
        clusters_list.append(clusters_temp)
        mean_count_list.append(mean_count_temp)
        mean_of_cluster_list.append(mean_of_cluster_temp)
        
#     print "clusters:", clusters_list
#     print "mean count:", mean_count_list 
#     print "tightness:", mean_of_cluster_list
    
    num, tightest = tightest_cluster(clusters_list, mean_count_list, mean_of_cluster_list, player)
#     num, largest = largest_cluster(clusters, mean_count, player)
    
#     print "p1:", p1
#     print "p2:", p2
#     print "tightest cluster: " + str(tightest)
    
    if turn:
#         print "avoid:", [word for word in board if word not in p1]
#         print "p1:", p1
        hint = give_hint(tightest, assassin, restriction=50000)
    else:
        hint = give_hint(tightest, assassin, restriction=50000)
        
#     print "hint: ", hint
    
    num_words = len(tightest)
#     num_words = hint_evaluator(player, hint, len(largest))
    
    p1_q, guesses1 = new_guesser(p1_q, turn, hint, num_words)
    p2_q, guesses2 = new_guesser(p2_q, p2_turn, hint, num_words)

#     print "guesses1:", guesses1
#     print "p1_q:", p1_q
#     print ' '
#     print "guesses2:", guesses2
#     print "p2_q:", p2_q
#     print ' '
    
    if turn :
        guesses = []
        number = min(len(guesses1), range(num_words))
        for num in range(number):
            if guesses1[num][1] > cutoff:
                guesses.append(guesses1[num])

    else :
        guesses = []
        number = min(len(guesses2), range(num_words))
        for num in range(number):
            if guesses2[num][1] > cutoff:
                guesses.append(guesses2[num])
                
#     print "guesser: ", guesses
#     print ' '

    
    incorrect_guesses = 0
    assassin_count = 0
    for i in range(len(guesses)) :
        if assassin[0] == guesses[i][0] :
            assassin_count += 1
            if turn :
                return "Assassin :( Team 2 wins.", board, p1_q, p2_q, incorrect_guesses, assassin_count
            else : 
                return "Assassin :( Team 1 wins.", board, p1_q, p2_q, incorrect_guesses, assassin_count
            
        board.remove(guesses[i][0])
        
        if guesses[i][0] in p2 :
            p2.remove(guesses[i][0])
            if turn :
                incorrect_guesses += 1
#                 print "p2 incorrect guess:", guesses[i][0]
                break
        elif guesses[i][0] in p1 :
            p1.remove(guesses[i][0])
            if not turn :
                incorrect_guesses += 1
#                 print "p1 incorrect guess:", guesses[i][0]
                break
        else:
            incorrect_guesses += 1
#             print "neutral:", guesses[i][0]
            break

    if not p1 :
        return "Team 1 wins!", board, p1_q, p2_q, incorrect_guesses, assassin_count
    elif not p2 :
        return "Team 2 wins!", board, p1_q, p2_q, incorrect_guesses, assassin_count
    else :
        return "continue", board, p1_q, p2_q, incorrect_guesses, assassin_count


In [43]:

success = []
num_turns_to_win = []
avg_wrong = []
for x in range(100) :
    # create a new game
    board, p1, p2, neu, assassin, p1_vecs, p2_vecs, assassin_vec = new_game(words)
    p1_q = []
    p2_q = []
    
    for i in range(25) :
        p1_q.append((board[i], 0.0))
        p2_q.append((board[i], 0.0))
    
    # kepping track of # of incorrect guesses
    incorrect_guesses = 0
    
    game_end = "continue"
    turn_number = 1
    incorrect_guesses = 0
    assassin_total = 0
    
    cutoff = 0.09
    while turn_number < 20 and game_end == "continue" :
        game_end, board, p1_q, p2_q, wrong, assassin_count = take_turn(board, p1, p2, p1_q, p2_q, assassin, turn_number%2, cutoff)
        incorrect_guesses += wrong
        turn_number +=1 
        assassin_total += assassin_count
    
    num_turns_to_win.append(turn_number)
    avg_wrong.append(incorrect_guesses)
    # print game_end

print np.mean(num_turns_to_win)
print np.mean(avg_wrong)
print np.mean(assassin_total)


7.7
5.3
1.0


In [45]:
# other choice: find largest count


combined = [0, trial1[0], trial2[0], trial3[0]]

words1 = largest_cluster(trial1[0], trial1[1])
words2 = largest_cluster(trial2[0], trial2[1])

print words1

NameError: name 'trial1' is not defined

In [23]:
full_hint = model.most_similar(
    positive=words1[1],
    negative=p2 + [assassin],
    restrict_vocab=50000
)

In [22]:
index = 0
all_checked = False
for i, word in enumerate(board):
#         print word
#         print full_hint[index][0]
    if full_hint[index][0] in word or word in full_hint[index][0]:
        index += 1
        i = 0
    
hint = full_hint[index][0]
print hint

В


In [13]:
print assassin

theater


In [24]:
similarities = []
for word in board:
    prob = model.similarity(hint, word)
    similarities.append((word, prob))
    
sort_by_similarity = sorted(similarities, key=lambda tup: tup[1], reverse=True)

res = 0
for i in range(len(words1[1])) :
    if sort_by_similarity[i][1] > 0.2 :
        res += 1
        

print sort_by_similarity[:len(words1[1])]

[('snow', -0.063674614), ('amazon', -0.08624632), ('straw', -0.0966843), ('skyscraper', -0.10037614), ('box', -0.10054034)]


In [26]:
print words1

(1, ['space', 'grace', 'smuggler', 'time', 'temple'])
