In [1]:
import numpy as np
import random

from q1_softmax import softmax
from q2_gradcheck import gradcheck_naive
from q2_sigmoid import sigmoid, sigmoid_grad

def normalizeRows(x):
    """ Row normalization function """
    # Implement a function that normalizes each
    # row of a matrix to have unit length

    # ## YOUR CODE HERE
    all_norm2 = np.sqrt(np.sum(np.power(x, 2), 1))
    all_norm2 = 1/all_norm2
    x = x * all_norm2[:, np.newaxis]
    # ## END YOUR CODE

    return x


dataset = type('dummy', (), {})()

def dummySampleTokenIdx():
    return random.randint(0, 4)

def getRandomContext(C):
    tokens = ["a", "b", "c", "d", "e"]
    return tokens[random.randint(0, 4)],
    [tokens[random.randint(0, 4)]
     for i in xrange(2*C)]

dataset.sampleTokenIdx = dummySampleTokenIdx
dataset.getRandomContext = getRandomContext

random.seed(31415)
np.random.seed(9265)
dummy_vectors = normalizeRows(np.random.randn(10, 3))
v_hat = normalizeRows(np.random.randn(1, 3))[0]
dummy_tokens = dict([("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)])

In [148]:
def MYnegSamplingCostAndGradient(predicted,
                               target,
                               outputVectors,
                               random_sample,
                               K=10):
    sample_vectors = outputVectors[random_sample]
    target_pred = outputVectors[target].dot(predicted)
    sample_pred = sample_vectors.dot(predicted)
    cost = - (np.log(sigmoid(target_pred)) +
              np.sum(np.log(sigmoid(-sample_pred))))

    gradPred = - sigmoid(- target_pred)*outputVectors[target] + np.dot(
        sigmoid(sample_pred), sample_vectors)

    grad = np.zeros(outputVectors.shape)
    grad[target] = - sigmoid(- target_pred) * predicted
    for i in random_sample:
        grad[i] += sigmoid(outputVectors[i].dot(predicted)) * predicted

    
    # ## END YOUR CODE

    return cost, gradPred, grad

def ALTnegSamplingCostAndGradient(predicted, target, outputVectors,sampleIndexs,K=10):
    V, D = outputVectors.shape
    sampleVectors = outputVectors[sampleIndexs, :]
    
    w_r_out = sigmoid(outputVectors[target].dot(predicted))
    w_r_k = sigmoid(- sampleVectors.dot(predicted))
    
    cost = - np.log(w_r_out) - np.sum(np.log(w_r_k))
    gradPred = outputVectors[target] * (w_r_out - 1)+  (1 - w_r_k).dot(sampleVectors)
    grad = np.zeros(outputVectors.shape)
    
    grad[target] = predicted * (w_r_out - 1)
    for i in xrange(K):
#     grad[sampleIndexs, :] = predicted * ((1 - w_r_k).reshape(K, 1))
        grad[sampleIndexs[i]] += predicted * (1 - w_r_k)[i]     
    ### END YOUR CODE
    
    return cost, gradPred, grad

In [93]:
target = 1 
K = 5 
random_sample = []
while len(random_sample) < K:
    pick_idx = dataset.sampleTokenIdx()
    if pick_idx != target:
        random_sample.append(pick_idx)
random_sample 

[0, 4, 3, 0, 4]

In [149]:
MYnegSamplingCostAndGradient(v_hat,6,dummy_vectors,random_sample,5)

(4.7319514233077671,
 array([-2.13918848,  0.37526311,  0.14503597]),
 array([[-0.38785727,  0.19586566, -0.9666187 ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [-0.25765128,  0.13011239, -0.64211906],
        [-0.40008065,  0.2020384 , -0.99708185],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.12623282, -0.06374684,  0.3145977 ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]]))

In [150]:
ALTnegSamplingCostAndGradient(v_hat,6,dummy_vectors,random_sample,5)

(4.7319514233077671,
 array([-2.13918848,  0.37526311,  0.14503597]),
 array([[-0.38785727,  0.19586566, -0.9666187 ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [-0.25765128,  0.13011239, -0.64211906],
        [-0.40008065,  0.2020384 , -0.99708185],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.12623282, -0.06374684,  0.3145977 ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        ]]))

In [110]:

0.529893462778*-0.96735714  + 0.529893462778*-0.59609459+ 0.529893462778*-0.56713774
+ 0.529893462778*-0.96735714  + 0.529893462778*-0.59609459 


-0.8284628510959546