In [1]:
import os
import numpy as np

datapath = 'data/';
files = os.listdir(datapath)
def readFile(s):
    fh = open(datapath+files[s],'r')
    rawlines = fh.readlines()
    lines = [line.strip('\n').split(',') for line in rawlines];
    fh.close();
    #ys is the tags for the word to distinct {verb, noun, adjective, adverb, preposition, pronoun, determiner, number, punctuation, other}
    ys = [int(l[1])-1 for l in lines];
    #xs is some features of the word
    xs = [[int(l[2])-1,int(l[3]),int(l[4]),int(l[5])-1,int(l[6])-1] for l in lines];
    return ys, xs

In [2]:
import pyGM as gm
from pyGM import wmb
# the number of possible value for each feature in x
feature_sizes = [1,2,2,201,201]
#the random factor table for y vs x
ThetaF = [.001*np.random.rand(10,feature_sizes[f]) for f in range(len(feature_sizes))];
ThetaP = .001*np.random.rand(10,10);
Loss = 1.0 - np.eye(10);

num_iter = 5
hamming = np.zeros(num_iter)
hinge = np.zeros(num_iter)
eta = 0.01
alpha = 0.5
print (ThetaF[1][:,1])

[ 0.00091917  0.00050669  0.00072867  0.00030579  0.00090685  0.00027299
  0.00043459  0.00039775  0.00084228  0.00069654]


In [3]:
for iter in range(num_iter):
    print ('iteration:',iter+1)
    N = 0
    num_files = 0
    for s in np.random.permutation(len(files)-1):
        #load data ys, xs
        ys,xs = readFile(s+1)
        ns = len(ys)
        #updata the number of words
        N +=ns
        num_files +=1
        #Define random variables for the inference process:
        Y = [gm.Var(i,10) for i in range(ns)]
        # Build "prediction model" using parameters
        factors = []
        for i in range(ns):
            for j in range(len(feature_sizes)):
            #record factor between y and x
                factor = gm.Factor([Y[i]],1)
                factor.table = np.exp(ThetaF[j][:,xs[i][j]])
                factors.append(factor)
            if i < ns-1:
            #record factors between yi and yi+1
                factor = gm.Factor([Y[i],Y[i+1]],1)
                factor.table = np.exp(ThetaP)
                factors.append(factor)
        model_pred = gm.GraphModel(factors)
        # Copy factors and add extra Hamming factors for loss-augmented model
        factors_aug = [ f for f in factors ]
        factors_aug.extend( [gm.Factor([Y[i]],Loss[:,ys[i]]).exp() for i in range(ns)] );
        model_aug = gm.GraphModel(factors_aug);
        order = range(ns);  # eliminate in sequence (Markov chain on y)
        wt = 1e-4;         # for max elimination in JTree implementation
        # Now, the most likely configuration of the prediction model (for prediction) is:
        yhat_pred = wmb.JTree(model_pred,order,wt).argmax();
        yhat_pred = [yhat_pred[i] for i in yhat_pred]
        # and the maximizing argument of the loss (for computing the gradient) is
        yhat_aug = wmb.JTree(model_aug,order,wt).argmax();
        yhat_aug = [yhat_aug[i] for i in yhat_aug]
        # use yhat_pred & ys to keep a running estimate of your prediction accuracy & print it
        if num_files%1000==0:
            print ("Hamming Loss:", hamming[iter]/N)
            print ("Hinge Loss:", hinge[iter]/N)
                
        #hamming loss
        h = Loss[np.asarray(ys).astype(int),np.asarray(yhat_pred).astype(int)].sum()
        #h = Loss[ys,yhat_pred].sum()
        hamming[iter] +=h
        #hinge loss
        score_y_pred, score_ys = 0,0
        for i in range(ns):
            for j in range(len(feature_sizes)):
                score_y_pred +=ThetaF[j][yhat_pred[i]][xs[i][j]]
                score_ys +=ThetaF[j][ys[i]][xs[i][j]]
            if i < ns-1:
                score_y_pred +=ThetaP[yhat_pred[i],yhat_pred[i+1]]
                score_ys +=ThetaP[ys[i],ys[i+1]]
                    
        norms = [np.linalg.norm(ThetaF[j]) for j in range(len(feature_sizes))]
        norms.append(np.linalg.norm(ThetaP))
        #norms = np.asarray(norms)
        hinge[iter] +=h +score_y_pred - score_ys + eta*np.dot(norms,norms)
            
        GradF = [np.zeros((10,feature_sizes[f])) for f in range(len(feature_sizes))]
        GradP = np.zeros((10,10))
            
        for i in range(ns):
            for j in range(len(feature_sizes)):
                GradF[j][yhat_aug[i]][xs[i][j]] +=alpha
                GradF[j][ys[i]][xs[i][j]] -=alpha
            if i < ns-1:
                GradP[yhat_aug[i],yhat_aug[i+1]] +=alpha
                GradP[ys[i],ys[i+1]] -=alpha
                    
        GradF = [2*eta*ThetaF[j] + GradF[j] for j in range(len(feature_sizes))]
        GradP = 2 * eta * ThetaP + GradP 
        
        ThetaF = [ThetaF[j] - GradF[j]/ns for j in range(len(feature_sizes))]
        ThetaP = ThetaP - GradP/ns
            
    hamming[iter] /=N
    hinge[iter] /=N
    print ("Hamming Loss:",hamming[iter])
    print ("Hinge Loss:",hinge[iter])
                

iteration: 1
Hamming Loss: 0.214035537331
Hinge Loss: 0.35472984079
Hamming Loss: 0.189165016291
Hinge Loss: 0.328147767777
Hamming Loss: 0.17969533114
Hinge Loss: 0.318159290174
Hamming Loss: 0.175443244561
Hinge Loss: 0.313868392742
Hamming Loss: 0.172144785318
Hinge Loss: 0.309634110046
Hamming Loss: 0.170705385353
Hinge Loss: 0.30837991972
Hamming Loss: 0.17071958536
Hinge Loss: 0.308403990904
iteration: 2
Hamming Loss: 0.157870114405
Hinge Loss: 0.293292049359
Hamming Loss: 0.160832605407
Hinge Loss: 0.297792016089
Hamming Loss: 0.159881158843
Hinge Loss: 0.296520737171
Hamming Loss: 0.16015944725
Hinge Loss: 0.297149909949
Hamming Loss: 0.159874581514
Hinge Loss: 0.296650060622
Hamming Loss: 0.160133480067
Hinge Loss: 0.297272907065
Hamming Loss: 0.160168980084
Hinge Loss: 0.29733358672
iteration: 3
Hamming Loss: 0.164468911364
Hinge Loss: 0.304327407501
Hamming Loss: 0.16047970167
Hinge Loss: 0.298034652409
Hamming Loss: 0.161490858741
Hinge Loss: 0.300404680944
Hamming Loss: 0.