# **FastText** - Predictions
This notebook trains a fastText supervised model and generates predictions on the test set. 


## Sources
Uses Facebook AI research's Python implementation of FastText: https://github.com/facebookresearch/fastText/tree/master/python
## Reproducibility
After running this notebook, you will obtain the model used for Submission **#109984** on AIcrowd

| Accuracy | F1 |
|:---:|:---:|
| 85.9% | 86.0% |


### Loading the trained model

In [1]:
import numpy as np 
import pandas as pd 
import fasttext_models as mod
import os 
import wget
import fasttext

root = 'data/'
model_filename = root + "fasttext_trained_model.bin"
############## OLD URL
model_url = 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3UvcyFBclREZ3U5ejdJT1ZqcU0yOGxGTDgya0l4OGNlNGc_ZT1DMnNy/root/content'
wget.download(model_url, model_filename)

model = fasttext.load_model(model_filename)

  0% [                                                                    ]          0 / 1047971507  0% [                                                                    ]       8192 / 1047971507  0% [                                                                    ]      16384 / 1047971507  0% [                                                                    ]      24576 / 1047971507  0% [                                                                    ]      32768 / 1047971507  0% [                                                                    ]      40960 / 1047971507  0% [                                                                    ]      49152 / 1047971507  0% [                                                                    ]      57344 / 1047971507  0% [                                                                    ]      65536 / 1047971507  0% [                                                                    ]      73728 / 1047971507

  0% [                                                                    ]    1245184 / 1047971507  0% [                                                                    ]    1253376 / 1047971507  0% [                                                                    ]    1261568 / 1047971507  0% [                                                                    ]    1269760 / 1047971507  0% [                                                                    ]    1277952 / 1047971507  0% [                                                                    ]    1286144 / 1047971507  0% [                                                                    ]    1294336 / 1047971507  0% [                                                                    ]    1302528 / 1047971507  0% [                                                                    ]    1310720 / 1047971507  0% [                                                                    ]    1318912 / 1047971507

  0% [                                                                    ]    1949696 / 1047971507  0% [                                                                    ]    1957888 / 1047971507  0% [                                                                    ]    1966080 / 1047971507  0% [                                                                    ]    1974272 / 1047971507  0% [                                                                    ]    1982464 / 1047971507  0% [                                                                    ]    1990656 / 1047971507  0% [                                                                    ]    1998848 / 1047971507  0% [                                                                    ]    2007040 / 1047971507  0% [                                                                    ]    2015232 / 1047971507  0% [                                                                    ]    2023424 / 1047971507

  0% [                                                                    ]    3055616 / 1047971507  0% [                                                                    ]    3063808 / 1047971507  0% [                                                                    ]    3072000 / 1047971507  0% [                                                                    ]    3080192 / 1047971507  0% [                                                                    ]    3088384 / 1047971507  0% [                                                                    ]    3096576 / 1047971507  0% [                                                                    ]    3104768 / 1047971507  0% [                                                                    ]    3112960 / 1047971507  0% [                                                                    ]    3121152 / 1047971507  0% [                                                                    ]    3129344 / 1047971507

  0% [                                                                    ]    4423680 / 1047971507  0% [                                                                    ]    4431872 / 1047971507  0% [                                                                    ]    4440064 / 1047971507  0% [                                                                    ]    4448256 / 1047971507  0% [                                                                    ]    4456448 / 1047971507  0% [                                                                    ]    4464640 / 1047971507  0% [                                                                    ]    4472832 / 1047971507  0% [                                                                    ]    4481024 / 1047971507  0% [                                                                    ]    4489216 / 1047971507  0% [                                                                    ]    4497408 / 1047971507

  0% [                                                                    ]    5971968 / 1047971507  0% [                                                                    ]    5980160 / 1047971507  0% [                                                                    ]    5988352 / 1047971507  0% [                                                                    ]    5996544 / 1047971507  0% [                                                                    ]    6004736 / 1047971507  0% [                                                                    ]    6012928 / 1047971507  0% [                                                                    ]    6021120 / 1047971507  0% [                                                                    ]    6029312 / 1047971507  0% [                                                                    ]    6037504 / 1047971507  0% [                                                                    ]    6045696 / 1047971507

  0% [                                                                    ]    7962624 / 1047971507  0% [                                                                    ]    7970816 / 1047971507  0% [                                                                    ]    7979008 / 1047971507  0% [                                                                    ]    7987200 / 1047971507  0% [                                                                    ]    7995392 / 1047971507  0% [                                                                    ]    8003584 / 1047971507  0% [                                                                    ]    8011776 / 1047971507  0% [                                                                    ]    8019968 / 1047971507  0% [                                                                    ]    8028160 / 1047971507  0% [                                                                    ]    8036352 / 1047971507

  0% [                                                                    ]    9084928 / 1047971507  0% [                                                                    ]    9093120 / 1047971507  0% [                                                                    ]    9101312 / 1047971507  0% [                                                                    ]    9109504 / 1047971507  0% [                                                                    ]    9117696 / 1047971507  0% [                                                                    ]    9125888 / 1047971507  0% [                                                                    ]    9134080 / 1047971507  0% [                                                                    ]    9142272 / 1047971507  0% [                                                                    ]    9150464 / 1047971507  0% [                                                                    ]    9158656 / 1047971507

100% [....................................................................] 1047971507 / 1047971507



### Make predictions on the test set

In [2]:
# Prepare test set
test_url = 'https://api.onedrive.com/v1.0/shares/u!aHR0cHM6Ly8xZHJ2Lm1zL3QvcyFBclREZ3U5ejdJT1ZqcDR5Q3hoWXM4T2FJd1JLenc_ZT1hSXh0/root/content'
test_filename = root + 'test.txt'
wget.download(test_url, test_filename)

test_tweets = []
with open(test_filename, encoding = 'utf-8') as f:
    for line in f:
        sp = line.split(',')

        test_tweets.append(','.join(sp[1:])[:-1]) # Remove index and \n
        
# Generate predictions
res = {'__label__0': 0, '__label__1': 1}
predictions = np.array([res[el[0]] for el in model.predict(test_tweets, k=1)[0]])

# Save predictions
save_filename = 'submission_fasttext_predictions.csv'
mod.save_pred(save_filename, predictions)

  0% [                                                                            ]      0 / 817297  1% [                                                                            ]   8192 / 817297  2% [.                                                                           ]  16384 / 817297  3% [..                                                                          ]  24576 / 817297  4% [...                                                                         ]  32768 / 817297  5% [...                                                                         ]  40960 / 817297  6% [....                                                                        ]  49152 / 817297  7% [.....                                                                       ]  57344 / 817297  8% [......                                                                      ]  65536 / 817297  9% [......                                                                      ]  73728 / 817297

 98% [..........................................................................  ] 802816 / 817297 99% [........................................................................... ] 811008 / 817297100% [............................................................................] 817297 / 817297