In [5]:
!pip install fasttext



In [22]:
import fasttext

In [79]:
import fasttext

class ImplementFastText:

    def __init__(self):
        """Init class vars."""
        self.model = ''
        self.precision = 0.0
        self.recall = 0.0
        self.f_score = 0.0

    def load_model(self, model):
        """Load pre-trained model."""
        #self.model = fasttext.load_model('./model.bin')
        self.model = model
        
    def score(self):
        """Get classification scores."""
        self.model_score = self.model.test('./Data/fasttext/test.txt')

        num_samples = self.model_score[0]
        self.precision = self.model_score[1]
        self.recall = self.model_score[2]

        self.f_score = 2 * ((self.precision * self.recall) /
                            (self.precision + self.recall))

        return {
            'num_samples': num_samples,
            'precision': round(self.precision, 3),
            'recall': round(self.recall, 3),
            'f_score': round(self.f_score, 3)
        }

    def predict(self, question, num_tags=5):
        """Get predicted tags with probability scores."""
        self.result = self.model.predict(question, k=num_tags)

        tags = list(map(
            lambda tag: tag.replace('__label__', ''), self.result[0][0]))
        probability_score = list(map(
            lambda score: round(score, 3), self.result[1][0]))

        predicted_tags = dict(zip(tags, probability_score))
        #predicted_tags.pop('')

        return predicted_tags

In [80]:

def get_predictions(question, no_of_tags):
    """Get predicted tags and score of the models.

    By already loading the trained model and invoking fasttext methods.
    """
    ft_obj = ImplementFastText()
    ft_obj.load_model()
    result = ft_obj.predict([question], num_tags=no_of_tags)
    score = ft_obj.score()

    return result, score

In [81]:
learning_rate = [x*0.1 for x in range(1,11)]     ## 0.1 ~ 1.0 0.1 단위로
epochs = [x for x in range(5, 55, 5)]   ## 5 ~ 50
ngrams = [x for x in range(1,6)] # 1 ~ 5

In [84]:
scores = []
for lr in learning_rate:
    for epoch in epochs:
        for ngram in ngrams:
            model = fasttext.train_supervised(input="./Data/fasttext/train.txt", lr=lr,\
                                               epoch=epoch, wordNgrams=ngram, bucket=200000, dim=50, loss='ova')
            ft_obj = ImplementFastText()
            ft_obj.load_model(model)
            score = ft_obj.score()
            tmp = [lr, epoch, ngram, score]
            scores.append(tmp)

In [85]:
for i in scores:
    print(i)

[0.1, 5, 1, {'num_samples': 859, 'precision': 0.389, 'recall': 0.165, 'f_score': 0.232}]
[0.1, 5, 2, {'num_samples': 859, 'precision': 0.331, 'recall': 0.14, 'f_score': 0.197}]
[0.1, 5, 3, {'num_samples': 859, 'precision': 0.331, 'recall': 0.14, 'f_score': 0.197}]
[0.1, 5, 4, {'num_samples': 859, 'precision': 0.331, 'recall': 0.14, 'f_score': 0.197}]
[0.1, 5, 5, {'num_samples': 859, 'precision': 0.331, 'recall': 0.14, 'f_score': 0.197}]
[0.1, 10, 1, {'num_samples': 859, 'precision': 0.522, 'recall': 0.221, 'f_score': 0.311}]
[0.1, 10, 2, {'num_samples': 859, 'precision': 0.384, 'recall': 0.163, 'f_score': 0.229}]
[0.1, 10, 3, {'num_samples': 859, 'precision': 0.331, 'recall': 0.14, 'f_score': 0.197}]
[0.1, 10, 4, {'num_samples': 859, 'precision': 0.331, 'recall': 0.14, 'f_score': 0.197}]
[0.1, 10, 5, {'num_samples': 859, 'precision': 0.331, 'recall': 0.14, 'f_score': 0.197}]
[0.1, 15, 1, {'num_samples': 859, 'precision': 0.603, 'recall': 0.256, 'f_score': 0.359}]
[0.1, 15, 2, {'num_sam

In [89]:
ff = []
for i in scores:
    ff.append(i[3]['f_score'])

In [90]:
max(ff)

0.436

In [91]:
for i in scores:
    if(i[3]['f_score'] == 0.436):
        print(i)

[1.0, 5, 1, {'num_samples': 859, 'precision': 0.732, 'recall': 0.311, 'f_score': 0.436}]


In [94]:
reca = []
for i in scores:
    reca.append(i[3]['recall'])

In [95]:
max(reca)

0.311

In [98]:
for i in scores:
    if(i[3]['recall'] == 0.311):
        print(i)

[1.0, 5, 1, {'num_samples': 859, 'precision': 0.732, 'recall': 0.311, 'f_score': 0.436}]


In [106]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer
from nltk.tokenize import TreebankWordTokenizer
tokenizer=TreebankWordTokenizer()
n=WordNetLemmatizer()

def prep(text):
    
    shortword = re.compile(r'\W*\b\w{1,2}\b')
    text = shortword.sub('', text)
    text = re.sub(r'[?|!|\'|"|#|_]', '', text)
    text = re.sub(r'[,|.|;|:|(|)|{|}|\|/|<|>]|-', ' ', text)
    text = text.replace("\n"," ")
    text = re.sub('[^a-z A-Z]+', ' ', text)
    text = text.lower()
    
    
    text = tokenizer.tokenize(text)
    text = [n.lemmatize(w) for w in text]
    
    stop_words = set(stopwords.words('english'))
    stop_words.update(['zero','one','two','three','four','five','six','seven','eight','nine','ten',
                       'may','also','across','among','beside','however','yet','within',
                      'integer', 'number', 'contain', 'line', 'first'])
    result = []
    for w in text: 
        if w not in stop_words: 
            result.append(w) 
    text = ' '.join(result)
    
    return text
hi = '''
Bob is playing a game of Spaceship Solitaire. The goal of this game is to build a spaceship. In order to do this, he first needs to accumulate enough resources for the construction. There are 𝑛 types of resources, numbered 1 through 𝑛. Bob needs at least 𝑎𝑖 pieces of the 𝑖-th resource to build the spaceship. The number 𝑎𝑖 is called the goal for resource 𝑖.

Each resource takes 1 turn to produce and in each turn only one resource can be produced. However, there are certain milestones that speed up production. Every milestone is a triple (𝑠𝑗,𝑡𝑗,𝑢𝑗), meaning that as soon as Bob has 𝑡𝑗 units of the resource 𝑠𝑗, he receives one unit of the resource 𝑢𝑗 for free, without him needing to spend a turn. It is possible that getting this free resource allows Bob to claim reward for another milestone. This way, he can obtain a large number of resources in a single turn.

The game is constructed in such a way that there are never two milestones that have the same 𝑠𝑗 and 𝑡𝑗, that is, the award for reaching 𝑡𝑗 units of resource 𝑠𝑗 is at most one additional resource.

A bonus is never awarded for 0 of any resource, neither for reaching the goal 𝑎𝑖 nor for going past the goal — formally, for every milestone 0<𝑡𝑗<𝑎𝑠𝑗.

A bonus for reaching certain amount of a resource can be the resource itself, that is, 𝑠𝑗=𝑢𝑗.

Initially there are no milestones. You are to process 𝑞 updates, each of which adds, removes or modifies a milestone. After every update, output the minimum number of turns needed to finish the game, that is, to accumulate at least 𝑎𝑖 of 𝑖-th resource for each 𝑖∈[1,𝑛].

Input
The first line contains a single integer 𝑛 (1≤𝑛≤2⋅105) — the number of types of resources.

The second line contains 𝑛 space separated integers 𝑎1,𝑎2,…,𝑎𝑛 (1≤𝑎𝑖≤109), the 𝑖-th of which is the goal for the 𝑖-th resource.

The third line contains a single integer 𝑞 (1≤𝑞≤105) — the number of updates to the game milestones.

Then 𝑞 lines follow, the 𝑗-th of which contains three space separated integers 𝑠𝑗, 𝑡𝑗, 𝑢𝑗 (1≤𝑠𝑗≤𝑛, 1≤𝑡𝑗<𝑎𝑠𝑗, 0≤𝑢𝑗≤𝑛). For each triple, perform the following actions:

First, if there is already a milestone for obtaining 𝑡𝑗 units of resource 𝑠𝑗, it is removed.
If 𝑢𝑗=0, no new milestone is added.
If 𝑢𝑗≠0, add the following milestone: "For reaching 𝑡𝑗 units of resource 𝑠𝑗, gain one free piece of 𝑢𝑗."
Output the minimum number of turns needed to win the game.
Output
Output 𝑞 lines, each consisting of a single integer, the 𝑖-th represents the answer after the 𝑖-th update.
'''
hi = prep(hi)


In [107]:

model = fasttext.train_supervised(input="./Data/fasttext/train.txt", lr=1.0,\
                                   epoch=5, wordNgrams=1, bucket=200000, dim=50, loss='ova')
ft_obj = ImplementFastText()
ft_obj.load_model(model)
score = ft_obj.score()
result = ft_obj.predict([hi], num_tags=2)
print(result)
print(score)

{'dp': 0.508, 'greedy': 0.314}
{'num_samples': 859, 'precision': 0.728, 'recall': 0.309, 'f_score': 0.433}
