In [2]:
import numpy as np

In [3]:
def softmax(x):
    """Compute the softmax function for each row of the input x.
    It is crucial that this function is optimized for speed because
    it will be used frequently in later code. 

    Arguments:
    x -- A D dimensional vector or N x D dimensional numpy matrix.
    Return:
    x -- You are allowed to modify x in-place
    """
    orig_shape = x.shape

    if len(x.shape) > 1:
        # Matrix
        tmp = np.max(x, axis=1)
        x -= tmp.reshape((x.shape[0], 1))
        x = np.exp(x)
        tmp = np.sum(x, axis=1)
        x /= tmp.reshape((x.shape[0], 1))
    else:
        # Vector
        tmp = np.max(x)
        x -= tmp
        x = np.exp(x)
        tmp = np.sum(x)
        x /= tmp

    assert x.shape == orig_shape
    return x

In [7]:
import numpy as np
outsideWordVecs=np.random.rand(100,10)
centerWordVecs=np.random.rand(100,10)

centerWordVector=centerWordVecs[1]

softmax(np.dot(outsideWordVecs,centerWordVector)).shape

(100,)

In [396]:
def func(x):
    outsideVectors[10]=x
    return -np.log(softmax(np.dot(outsideVectors,centerWordVec)))[outsideWordIdx],np.array(((y_c*y4)/y3)).repeat(10)

In [397]:
gradcheck_naive(func, outsideVectors[10], 'gradientText')

Gradient check failed for gradientText.
First gradient error found at index (0,) in the vector of gradients
Your gradient: 0.006209 	 Numerical gradient: 0.004677


In [398]:
outsideVectors=np.random.rand(100,10)
centerWordVec=np.random.rand(10)
outsideWordIdx=1

In [496]:
def func(x):
    y_=softmax(np.dot(outsideVectors,x))
    y=np.zeros(outsideVectors.shape[0])
    y[outsideWordIdx]=1
    
    return -np.log(y_[outsideWordIdx]),np.dot((y_[outsideWordIdx]-1),outsideVectors)

In [500]:
y_[outsideWordIdx]-1

-0.9924198937504909

In [495]:
gradcheck_naive(func,centerWordVec,'gradCenterVec')

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [492]:
def func2(x):
    y_=softmax(np.dot(x,centerWordVec))
    sub=np.zeros_like(y_)
    sub[outsideWordIdx]=1
    y_sub=y_-sub
    gradOutsideVecs=np.array([centerWordVec * y_sub[i] for i in range(len(y_sub))])
    
    return -np.log(y_[outsideWordIdx]),gradOutsideVecs

In [493]:
gradcheck_naive(func2,outsideVectors,'gradOutsideVecs')

Gradient check passed!. Read the docstring of the `gradcheck_naive` method in utils.gradcheck.py to understand what the gradient check does.


In [501]:

def sigmoid(x):
    """
    Compute the sigmoid function for the input here.
    Arguments:
    x -- A scalar or numpy array.
    Return:
    s -- sigmoid(x)
    """
    
    ### YOUR CODE HERE (~1 Line)
    s=1/(1+np.exp(-x))
    ### END YOUR CODE

    return s

In [524]:
print(centerWordVec.shape) #中心词
print(outsideVectors[outsideWordIdx].shape) #预测正样本
negSampleWordIndices=[i for i in range(10,21)]
print(outsideVectors[negSampleWordIndices].shape) #预测正样本

(10,)
(10,)
(11, 10)


2.7120182581552803

In [539]:
#目标函数
r=-np.log(sigmoid(np.dot(outsideVectors[outsideWordIdx],centerWordVec)))
l=-np.log(sigmoid(np.dot(-np.concatenate([outsideVectors[negSampleWordIndices]]),centerWordVec))).sum()
l+r

2.7634282773443037

In [557]:
#对V_c之偏导
left=((1-sigmoid(np.dot(-np.concatenate([outsideVectors[negSampleWordIndices]]),centerWordVec))) @ outsideVectors[negSampleWordIndices]).sum()
right=-centerWordVec*(1-sigmoid(np.dot(outsideVectors[outsideWordIdx],centerWordVec)))
left+right

array([1.06602457, 2.02811054, 0.89595356, 1.738278  , 0.29081038,
       1.1979591 , 2.15037812, 0.90683858, 3.01771636, 0.3440505 ])

In [618]:
def grad_v_c(x):
    centerWordVec=x
    tmpl=sigmoid(np.dot(outsideVectors[outsideWordIdx],centerWordVec))
    tmpr=sigmoid(np.dot(-np.concatenate([outsideVectors[negSampleWordIndices]]),centerWordVec))
    
    obj=-np.log(tmpl)-np.log(tmpr).sum()
    
    #对V_c之偏导
    grad=((1-tmpr) @\
          np.concatenate([outsideVectors[negSampleWordIndices]]))-\
    outsideVectors[outsideWordIdx]*(1-tmpl)
    return obj,grad

In [619]:
gradcheck_naive(grad_v_c,centerWordVec,'grad_v_c')

Gradient check passed!. Read the docstring of the `gradcheck_naive` method in utils.gradcheck.py to understand what the gradient check does.


In [716]:
def grad_v_c(x):
    outsideVectors[outsideWordIdx]=x
    tmpl=sigmoid(np.dot(outsideVectors[outsideWordIdx],centerWordVec))
    tmpr=sigmoid(np.dot(-np.concatenate([outsideVectors[negSampleWordIndices]]),centerWordVec))
    
    obj=-np.log(tmpl)-np.log(tmpr).sum()
    
    #对U0之偏导
    grad=-centerWordVec * (1-tmpl)
    return obj,grad

In [717]:
gradcheck_naive(grad_v_c,outsideVectors[outsideWordIdx],'grad_v_c')

Gradient check passed!. Read the docstring of the `gradcheck_naive` method in utils.gradcheck.py to understand what the gradient check does.


In [710]:
def grad_v_c(x):
    
    tmpl=sigmoid(np.dot(outsideVectors[outsideWordIdx],centerWordVec))
    tmpr=sigmoid(np.dot(-x,centerWordVec))
    
    obj=-np.log(tmpl)-np.log(tmpr).sum()
    
    #对Uw之偏导
    grad=np.array([centerWordVec * (1-i) for i in tmpr])
    return obj,grad

In [711]:
x=np.concatenate([outsideVectors[negSampleWordIndices]])
gradcheck_naive(grad_v_c,x,'grad_v_c')

Gradient check passed!. Read the docstring of the `gradcheck_naive` method in utils.gradcheck.py to understand what the gradient check does.


In [718]:
    grad1=-centerWordVec * (1-tmpl)
    grad2=np.array([centerWordVec * (1-i) for i in tmpr])


In [719]:
grad1.shape

(10,)

In [720]:
grad2.shape

(11, 10)

In [724]:
np.concatenate([grad1.reshape(1,-1),grad2])

(12, 10)

In [745]:
def gradcheck_naive(f, x, gradientText):
    """ Gradient check for a function f.
    Arguments:
    f -- a function that takes a single argument and outputs the
         loss and its gradients
    x -- the point (numpy array) to check the gradient at
    gradientText -- a string detailing some context about the gradient computation

    Notes:
    Note that gradient checking is a sanity test that only checks whether the
    gradient and loss values produced by your implementation are consistent with
    each other. Gradient check passing on its own doesn’t guarantee that you
    have the correct gradients. It will pass, for example, if both the loss and
    gradient values produced by your implementation are 0s (as is the case when
    you have not implemented anything). Here is a detailed explanation of what
    gradient check is doing if you would like some further clarification:
    http://ufldl.stanford.edu/tutorial/supervised/DebuggingGradientChecking/. 
    """
    rndstate = random.getstate()
    random.setstate(rndstate)
    fx, grad = f(x) # Evaluate function value at original point
    h = 1e-4        # Do not change this!

    # Iterate over all indexes ix in x to check the gradient.
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        x[ix] += h # increment by h
        random.setstate(rndstate)
        fxh, _ = f(x) # evalute f(x + h)
        x[ix] -= 2 * h # restore to previous value (very important!)
        random.setstate(rndstate)
        fxnh, _ = f(x)
        x[ix] += h
        numgrad = (fxh - fxnh) / 2 / h

        # Compare gradients
        reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
        if reldiff > 1e-6:
            print("Gradient check failed for %s." % gradientText)
            print("First gradient error found at index %s in the vector of gradients" % str(ix))
            print("Your gradient: %f \t Numerical gradient: %f" % (
                grad[ix], numgrad))
            return

        it.iternext() # Step to next dimension

    print("Gradient check passed!. Read the docstring of the `gradcheck_naive`"
    " method in utils.gradcheck.py to understand what the gradient check does.")

In [746]:
def normalizeRows(x):
    """ Row normalization function

    Implement a function that normalizes each row of a matrix to have
    unit length.
    """
    N = x.shape[0]
    x /= np.sqrt(np.sum(x**2, axis=1)).reshape((N,1)) + 1e-30
    return x


In [761]:
def getDummyObjects():
    """ Helper method for naiveSoftmaxLossAndGradient and negSamplingLossAndGradient tests """

    def dummySampleTokenIdx():
        return random.randint(0, 4)

    def getRandomContext(C):
        tokens = ["a", "b", "c", "d", "e"]
        return tokens[random.randint(0,4)], \
            [tokens[random.randint(0,4)] for i in range(2*C)]

    dataset = type('dummy', (), {})()
    dataset.sampleTokenIdx = dummySampleTokenIdx
    dataset.getRandomContext = getRandomContext

    random.seed(31415)
    np.random.seed(9265)
    dummy_vectors = normalizeRows(np.random.randn(10,3))
    dummy_tokens = dict([("a",0), ("b",1), ("c",2),("d",3),("e",4)])

    return dataset, dummy_vectors, dummy_tokens


In [768]:
def naiveSoftmaxLossAndGradient(
    centerWordVec,
    outsideWordIdx,
    outsideVectors,
    dataset
):
    ### YOUR CODE HERE (~6-8 Lines)
    y_=softmax(np.dot(outsideVectors,centerWordVec))
    loss=-np.log(y_)[outsideWordIdx]
    y=np.zeros(outsideVectors.shape[0])
    y[outsideWordIdx]=1
    gradCenterVec=np.dot((y_-y),outsideVectors)
    
    sub=np.zeros_like(y_)
    sub[outsideWordIdx]=1
    y_sub=y_-sub
    gradOutsideVecs=np.array([centerWordVec * y_sub[i] for i in range(len(y_sub))])
    
    
    ### Please use the provided softmax function (imported earlier in this file)
    ### This numerically stable implementation helps you avoid issues pertaining
    ### to integer overflow. 

    ### END YOUR CODE
    
    return loss, gradCenterVec, gradOutsideVecs

In [769]:

def test_naiveSoftmaxLossAndGradient():
    """ Test naiveSoftmaxLossAndGradient """
    dataset, dummy_vectors, dummy_tokens = getDummyObjects()

    print("==== Gradient check for naiveSoftmaxLossAndGradient ====")
    def temp(vec):
        loss, gradCenterVec, gradOutsideVecs = naiveSoftmaxLossAndGradient(vec, 1, dummy_vectors, dataset)
        return loss, gradCenterVec
    gradcheck_naive(temp, np.random.randn(3), "naiveSoftmaxLossAndGradient gradCenterVec")

    centerVec = np.random.randn(3)
    def temp(vec):
        loss, gradCenterVec, gradOutsideVecs = naiveSoftmaxLossAndGradient(centerVec, 1, vec, dataset)
        return loss, gradOutsideVecs
    gradcheck_naive(temp, dummy_vectors, "naiveSoftmaxLossAndGradient gradOutsideVecs")

In [770]:
test_naiveSoftmaxLossAndGradient()

==== Gradient check for naiveSoftmaxLossAndGradient ====
Gradient check passed!. Read the docstring of the `gradcheck_naive` method in utils.gradcheck.py to understand what the gradient check does.
Gradient check passed!. Read the docstring of the `gradcheck_naive` method in utils.gradcheck.py to understand what the gradient check does.


In [890]:
def negSamplingLossAndGradient(
    centerWordVec,
    outsideWordIdx,
    outsideVectors,
    dataset,
    K=10
):
    """ Negative sampling loss function for word2vec models

    Implement the negative sampling loss and gradients for a centerWordVec
    and a outsideWordIdx word vector as a building block for word2vec
    models. K is the number of negative samples to take.

    Note: The same word may be negatively sampled multiple times. For
    example if an outside word is sampled twice, you shall have to
    double count the gradient with respect to this word. Thrice if
    it was sampled three times, and so forth.

    Arguments/Return Specifications: same as naiveSoftmaxLossAndGradient
    """

    # Negative sampling of words is done for you. Do not modify this if you
    # wish to match the autograder and receive points!
    negSampleWordIndices = getNegativeSamples(outsideWordIdx, dataset, K)
    indices = [outsideWordIdx] + negSampleWordIndices

    ### YOUR CODE HERE (~10 Lines)
    ### Please use your implementation of sigmoid in here.
    tmpl=sigmoid(np.dot(outsideVectors[outsideWordIdx],centerWordVec))
    tmpr=sigmoid(np.dot(-np.concatenate([outsideVectors[negSampleWordIndices]]),centerWordVec))
    loss=-np.log(tmpl)-np.log(tmpr).sum()
    
    gradCenterVec=((1-tmpr) @ np.concatenate([outsideVectors[negSampleWordIndices]]))-outsideVectors[outsideWordIdx]*(1-tmpl)
    
    gradu0=-centerWordVec * (1-tmpl)
    graduw=np.array([centerWordVec * (1-i) for i in tmpr])
    grad=np.concatenate([gradu0.reshape(1,-1),graduw])
    gradOutsideVecs=np.zeros_like(outsideVectors)
    
    
    for k,v in enumerate(indices):
        gradOutsideVecs[v]+=grad[k]
    ### END YOUR CODE
    return loss, gradCenterVec, gradOutsideVecs


In [891]:
def getNegativeSamples(outsideWordIdx, dataset, K):
    """ Samples K indexes which are not the outsideWordIdx """

    negSampleWordIndices = [None] * K
    for k in range(K):
        newidx = dataset.sampleTokenIdx()
        while newidx == outsideWordIdx:
            newidx = dataset.sampleTokenIdx()
        negSampleWordIndices[k] = newidx
    return negSampleWordIndices


In [892]:
centerVec = np.random.randn(3)
def temp(vec):
        loss, gradCenterVec, gradOutsideVecs = negSamplingLossAndGradient(centerVec, 1, vec, dataset)
        return loss, gradOutsideVecs
gradcheck_naive(temp, dummy_vectors, "negSamplingLossAndGradient gradOutsideVecs")

Gradient check passed!. Read the docstring of the `gradcheck_naive` method in utils.gradcheck.py to understand what the gradient check does.


In [881]:
dummy_vectors.shape

(10, 3)

AttributeError: 'dummy' object has no attribute 'shape'

In [888]:
i=1

In [889]:
i++

SyntaxError: invalid syntax (1074925126.py, line 1)