# FToDTF - FastText on Distributed TensorFlow

In [9]:
from fasttext import FastText

In [10]:
import tensorflow as tf
import os

Using SICK dataset for demonstration

In [11]:
tf.logging.set_verbosity(tf.logging.ERROR)
dirpath = os.getcwd()
datapath = dirpath + "/data/sick.xls" # Path to dataset
modelpath = dirpath + "/models/log_sick/log/" # Path to pre-trained model

Initialising an object of Class FastText

In [12]:
fasttext_obj = FastText(input_corpus_path=datapath, modelpath = modelpath)

Read Dataset method which returns the paths to the processed training and testing files

In [13]:
train_data_path,test_data_path = fasttext_obj.read_Dataset(datapath,'sick')

The dataset has been split into train and test based on labels in input dataset
85818
The input has been pre-processed.
The input has been written to batches - ready to train.


Train method, loading a pre-trained model here to save time. 

In [14]:
model, dict_map = fasttext_obj.train(train_data_path)

Training starting..
Loading a pre-trained model from /home/khadutz95/FToDTF/models/log_sick/log/
85818


Calling predict embedding for words as well as sentences

In [15]:
pred_embedding = fasttext_obj.predict_embedding(['man'],model, dict_map)
print ('Predicted embedding is',pred_embedding)

Predicted embedding is [  9.53503419e-04  -1.66640175e-03   5.08175232e-03  -8.79818574e-04
  -2.76247971e-03  -3.41044576e-03  -1.84082473e-03   3.02710384e-03
  -4.98916674e-03   2.95923464e-03  -1.99349364e-03  -5.37431613e-03
   4.69870958e-03   4.09289077e-03   1.78318517e-03  -7.15276226e-04
  -3.60754319e-03   4.91821580e-03  -3.34097072e-04  -6.76173251e-04
  -4.77561029e-03   5.54034486e-04  -1.19791133e-03  -3.01777385e-04
  -2.07121298e-03  -4.06620512e-03  -1.08396821e-03  -5.02670091e-03
  -4.76232870e-03  -2.46936642e-03   5.32076973e-03   2.35938001e-04
   9.84408427e-04   3.70168686e-03   5.09325415e-04  -2.38307449e-03
  -4.14777501e-03   1.08828209e-03  -2.48081726e-03  -4.80687711e-03
   3.57740466e-03   1.69749977e-03  -2.29056319e-03  -3.89188994e-04
  -2.88278796e-04   3.85188498e-04  -5.25151379e-04   2.68329866e-03
   4.07350808e-03  -1.58096058e-03  -3.31692281e-03  -5.01742447e-03
   4.31086216e-03  -2.04864983e-03  -1.83256017e-03   5.25092985e-03
  -2.227889

Predicting similarity for 2 input sentences

In [16]:
similarity = fasttext_obj.predict_similarity('A girl is styling her hair','A girl is brushing her hair',model, dict_map)
print (similarity[0][0])

0.816604239473


Evaluate method for Sick dataset

In [17]:
eval_score, mean_score,spearman_score = fasttext_obj.evaluate(model,test_data_path,'sick',dict_map)
print ('Pearson Score', eval_score)
print ('Mean Square Error', mean_score)
print ('Spearman Correlation', spearman_score)

Pearson Score 0.534617116275
Mean Square Error 0.0753853091228
Spearman Correlation 0.506368980195
