## Class formation

In [1]:
import numpy as np

class FastVector:
    """
    Minimal wrapper for fastvector embeddings.
    ```
    Usage:
        $ model = FastVector(vector_file='/path/to/wiki.en.vec')
        $ 'apple' in model
        > TRUE
        $ model['apple'].shape
        > (300,)
    ```
    """

    def __init__(self, vector_file='', transform=None):
        """Read in word vectors in fasttext format"""
        self.word2id = {}

        # Captures word order, for export() and translate methods
        self.id2word = []

        print('reading word vectors from %s' % vector_file)
        with open(vector_file, 'r') as f:
	    print ('1') 
            (self.n_words, self.n_dim) = \
            (int(x) for x in f.readline().rstrip('\n').split(' '))
            self.embed = np.zeros((self.n_words, self.n_dim))
            for i, line in enumerate(f):
                elems = line.rstrip('\n').split(' ')
                self.word2id[elems[0]] = i
                #print (elems[0])
                #ini=np.random.rand(300)
                self.embed[i] = elems[1:self.n_dim+1]
                self.id2word.append(elems[0])
        
        # Used in translate_inverted_softmax()
        self.softmax_denominators = None
        
        if transform is not None:
            print('Applying transformation to embedding')
            self.apply_transform(transform)
    
    def apply_cop(self, matrix,i):
        self.embed[i]=matrix[:]
    
    def export(self, outpath):
        """
        Transforming a large matrix of WordVectors is expensive. 
        This method lets you write the transformed matrix back to a file for future use
        :param The path to the output file to be written 
        """
        fout = open(outpath, "w")

        # Header takes the guesswork out of loading by recording how many lines, vector dims
        fout.write(str(self.n_words) + " " + str(self.n_dim) + "\n")
        for token in self.id2word:
            vector_components = ["%.6f" % number for number in self[token]]
            vector_as_string = " ".join(vector_components)

            out_line = token + " " + vector_as_string + "\n"
            fout.write(out_line)

        fout.close()
    
    
    @classmethod
    
    def __contains__(self, key):
        return key in self.word2id

    def __getitem__(self, key):
        return self.embed[self.word2id[key]]

In [2]:
import numpy as np

## convert to matrices

## import the word embeddings

In [5]:
#en_dictionary = FastVector1(vector_file='/home/apatra/fastText/fastText_multilingual-master/eng.vec')
mi_dictionary = FastVector(vector_file='/home/apatra/fastText/fastText_multilingual-master/model.vec')

#en_vector = en_dictionary["one"]
mi_vector = mi_dictionary["newt"]
#print(cosine_similarity(en_vector, mi_vector))
print mi_vector

reading word vectors from /home/apatra/fastText/fastText_multilingual-master/model.vec
1
[ 0.23514   -0.062265  -0.44914   -0.46058   -0.32173    0.22479
  0.59574   -0.16793   -0.20575    0.20684    0.32242   -0.10982
  0.21008   -0.5516    -0.22603    0.34196    0.082802  -0.34114
 -0.53359   -0.23387   -0.56995   -0.36627    0.57409    0.37144
 -0.021772   0.28574    0.40112   -0.23364   -0.44666   -0.27884
 -0.010244   0.47763    0.33714    0.10676   -0.30984   -0.14813
  0.19915   -0.15576   -0.14592   -0.64859    0.52664   -0.27724
 -0.0010243  0.41665   -0.28545   -0.022963   0.62326   -0.12353
 -0.37234   -0.064643   0.26404    0.25081   -0.39588   -0.033541
  0.29877   -0.43526    0.33414   -0.65083   -0.38522   -0.36271
 -0.24256    0.4016    -0.073898   0.13294    0.052566  -0.15048
  0.01158    0.65601   -0.32772    0.11772   -0.45992   -0.099391
  0.044374  -0.025823   0.23652    0.23804    0.086746  -0.093185
 -0.45943   -0.07592    0.23819   -0.20339    0.07404    0.2553

## Exporting the vecor to file format

In [6]:
mi_dictionary.export('/home/apatra/fastText/fastText_multilingual-master/micmaq2.vec')