In [None]:
import torch
import torch.nn as nn
import torch.autograd as ag
from torch.autograd import Variable

#use .clone() to copy data and not get affected by backprop

dtype = torch.FloatTensor
# dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs, and wrap them in Variables.
# Setting requires_grad=False indicates that we do not need to compute gradients
# with respect to these Variables during the backward pass.
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)

# Create random Tensors for weights, and wrap them in Variables.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Variables during the backward pass.
w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)

learning_rate = 1e-6
for t in range(100):
    # Forward pass: compute predicted y using operations on Variables; these
    # are exactly the same operations we used to compute the forward pass using
    # Tensors, but we do not need to keep references to intermediate values since
    # we are not implementing the backward pass by hand.
    y_pred = x.mm(w1).clamp(min=0).mm(w2)

    # Compute and print loss using operations on Variables.
    # Now loss is a Variable of shape (1,) and loss.data is a Tensor of shape
    # (1,); loss.data[0] is a scalar value holding the loss.
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.data[0])

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Variables with requires_grad=True.
    # After this call w1.grad and w2.grad will be Variables holding the gradient
    # of the loss with respect to w1 and w2 respectively.
    loss.backward()

    # Update weights using gradient descent; w1.data and w2.data are Tensors,
    # w1.grad and w2.grad are Variables and w1.grad.data and w2.grad.data are
    # Tensors.
    r = w1.data.clone()
    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data

    # Manually zero the gradients after updating weights
    w1.grad.data.zero_()
    w2.grad.data.zero_()
print r
print w1

In [None]:
x = ag.Variable(torch.Tensor([7]), requires_grad=True)
z_pred = x*4
z = ag.Variable(torch.Tensor([29]))
loss = nn.MSELoss()
l = loss(z_pred,z)
l.backward()
print x.grad.data

In [None]:
def f1(x):
    y = x**2
    print y.requires_grad
    return y

x = ag.Variable(torch.Tensor([7]), requires_grad=True)
#c = ag.Variable(torch.Tensor([6]), requires_grad=True)
z = ag.Variable(torch.Tensor([100]))

v1 = f1(x)

print v1
y=x*2
z_pred = f1(y)*4
#v2 = f1(x)
#print v1,v2

loss = nn.MSELoss()
l = loss(z_pred,z)
l.backward()
print x.grad.data
print z.requires_grad

In [27]:
#Making a subset of friends dataset to experiment the pipeline
fr = open('Friends-dialogues.txt','r')
fw = open('testd.txt','w')
c = 0
for l in fr:
    c+=1
    if c>1000:
        break
    fw.write(l)
print c
fw.close()
fr.close()

1001


In [39]:
#Making counter to pass to vocab file
from collections import Counter

f = open('testd.txt','r')
vc = Counter()
for l in f:
    vc.update(Counter(l.split()))
#print vc

In [48]:
#creating the vocab object
from datetime import datetime
import vocab
st = datetime.now()
vcb = vocab.Vocab(vc, wv_type = "glove.840B",min_freq=4)
print datetime.now()-st

loading word vectors from /home/rohith/Documents/NLP/Dialogue/glove.840B.300d.pt
0:00:02.695216


In [None]:
print datetime.now()


In [49]:
print len(vcb)


302


In [50]:
sum(vc.values())

6694

In [54]:
t1 = vcb.stoi['asdewrfg']
t2 = vcb.vectors[t1]
print t1,t2

0 
 0.8716
 0.2908
 0.0019
 1.5223
-0.1070
 0.8144
 0.4027
-2.5946
 0.1252
-0.8074
 1.4320
-0.0174
-0.3091
-0.4098
 1.1060
-0.4929
 0.6959
-1.1187
-1.5049
 0.9039
-0.0105
 0.3491
-1.8101
 0.7075
-1.3758
 0.8249
 0.4382
 0.4464
 0.5642
 1.5773
-1.7994
-0.6132
 1.3629
-1.9990
 0.5763
-0.2677
-1.2049
 1.3254
-0.3328
 0.5087
-1.0881
-0.0032
 0.5018
 0.8686
 0.3798
-1.1093
-1.4197
-0.4378
 0.2340
-1.9110
-0.9681
-0.8744
 0.7364
 0.7422
 0.0935
-1.2482
-0.3731
 0.8598
-0.5227
 0.4105
-0.9191
-0.0411
 0.4965
-1.2949
-1.1591
-0.8937
 1.0341
-0.7163
-0.1538
 1.0399
-0.5660
-0.8800
-1.2356
-1.7444
 0.1662
-1.4279
 1.2391
-0.3425
-1.2758
-1.1501
 0.3714
-0.1100
-0.7125
-0.0806
-1.4208
-0.6153
-0.4746
 0.7826
-0.1784
-0.7601
 2.4096
-1.0755
-1.2065
-1.1670
 0.7543
-1.0266
 0.1047
-1.9392
-1.6212
-0.9009
-0.2818
 1.8831
 0.2788
 0.7098
 1.5300
 0.1875
 2.2754
-2.4403
-2.2746
-0.1048
 0.9708
 0.7532
-1.0815
 1.4522
-0.6690
 0.3554
-0.8030
-0.2798
 1.7356
-0.9828
-0.2455
-1.5503
-3.3189
 0.3010
 1.47

In [None]:
#Making a subset of friends dataset to experiment the pipeline
fr = open('Friends-dialogues.txt','r')
fw = open('testd.txt','w')
c = 0
for l in fr:
    c+=1
    if c>1000:
        break
    fw.write(l)
print c
fw.close()
fr.close()