In [5]:
import pandas as pd
from sacremoses import MosesTokenizer

import fasttext

In [2]:
mtk = MosesTokenizer()

In [3]:
test = pd.read_csv('test.csv', index_col='review_id')
test

Unnamed: 0_level_0,review
review_id,Unnamed: 1_level_1
1,"Great danger, cool, motif and cantik2 jg model..."
2,One of the shades don't fit well
3,Very comfortable
4,Fast delivery. Product expiry is on Dec 2022. ...
5,it's sooooo cute! i like playing with the glit...
...,...
60423,Product has been succesfully ordered and shipp...
60424,Opening time a little scared. Fear dalemnya de...
60425,The product quality is excellent. The origina...
60426,They 're holding up REALLY well also .


In [4]:
def clean(series):
    series = series.str.lower()

    series = series.str.replace(r'’', '\'')
    series = series.str.replace(r'#[a-z]+', ' ')
    series = series.str.replace(r'@[a-z]+', ' ')
    series = series.str.replace(r'[\+\*=@#<>\(\)\[\]\^_]+', ' ')
    series = series.str.replace(r'[,!?;.:]+', '. ')

    series = series.apply(mtk.tokenize)

    return series

test['review_clean'] = clean(test['review'])
test

Unnamed: 0_level_0,review,review_clean
review_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,"Great danger, cool, motif and cantik2 jg model...","[great, danger., cool., motif, and, cantik2, j..."
2,One of the shades don't fit well,"[one, of, the, shades, don, &apos;t, fit, well]"
3,Very comfortable,"[very, comfortable]"
4,Fast delivery. Product expiry is on Dec 2022. ...,"[fast, delivery., product, expiry, is, on, dec..."
5,it's sooooo cute! i like playing with the glit...,"[it, &apos;s, sooooo, cute., i, like, playing,..."
...,...,...
60423,Product has been succesfully ordered and shipp...,"[product, has, been, succesfully, ordered, and..."
60424,Opening time a little scared. Fear dalemnya de...,"[opening, time, a, little, scared., fear, dale..."
60425,The product quality is excellent. The origina...,"[the, product, quality, is, excellent., the, o..."
60426,They 're holding up REALLY well also .,"[they, &apos;, re, holding, up, really, well, ..."


In [6]:
model = fasttext.load_model('fasttext_dim300_lr0001_epoch200_wordNgrams2.bin')
model.words[:10]



['</s>',
 '.',
 'the',
 'good',
 'is',
 'product',
 'quality',
 'very',
 '👍',
 'delivery']

In [7]:
test['review_clean'] = test['review_clean'].str.join(' ')
test

Unnamed: 0_level_0,review,review_clean
review_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,"Great danger, cool, motif and cantik2 jg model...",great danger. cool. motif and cantik2 jg model...
2,One of the shades don't fit well,one of the shades don &apos;t fit well
3,Very comfortable,very comfortable
4,Fast delivery. Product expiry is on Dec 2022. ...,fast delivery. product expiry is on dec 2022. ...
5,it's sooooo cute! i like playing with the glit...,it &apos;s sooooo cute. i like playing with th...
...,...,...
60423,Product has been succesfully ordered and shipp...,product has been succesfully ordered and shipp...
60424,Opening time a little scared. Fear dalemnya de...,opening time a little scared. fear dalemnya de...
60425,The product quality is excellent. The origina...,the product quality is excellent. the original...
60426,They 're holding up REALLY well also .,they &apos; re holding up really well also .


In [8]:
test['label'] = test['review_clean'].apply(model.predict)
test

Unnamed: 0_level_0,review,review_clean,label
review_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,"Great danger, cool, motif and cantik2 jg model...",great danger. cool. motif and cantik2 jg model...,"((__label__3,), [0.6427798867225647])"
2,One of the shades don't fit well,one of the shades don &apos;t fit well,"((__label__2,), [0.4472169280052185])"
3,Very comfortable,very comfortable,"((__label__4,), [0.49385765194892883])"
4,Fast delivery. Product expiry is on Dec 2022. ...,fast delivery. product expiry is on dec 2022. ...,"((__label__4,), [0.2408541440963745])"
5,it's sooooo cute! i like playing with the glit...,it &apos;s sooooo cute. i like playing with th...,"((__label__4,), [0.38820934295654297])"
...,...,...,...
60423,Product has been succesfully ordered and shipp...,product has been succesfully ordered and shipp...,"((__label__4,), [0.3749113082885742])"
60424,Opening time a little scared. Fear dalemnya de...,opening time a little scared. fear dalemnya de...,"((__label__3,), [0.3956567645072937])"
60425,The product quality is excellent. The origina...,the product quality is excellent. the original...,"((__label__5,), [0.49430492520332336])"
60426,They 're holding up REALLY well also .,they &apos; re holding up really well also .,"((__label__4,), [0.3741464614868164])"


In [10]:
test['rating'] = test['label'].apply(lambda x: x[0][0][-1])
test

Unnamed: 0_level_0,review,review_clean,label,rating
review_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,"Great danger, cool, motif and cantik2 jg model...",great danger. cool. motif and cantik2 jg model...,"((__label__3,), [0.6427798867225647])",3
2,One of the shades don't fit well,one of the shades don &apos;t fit well,"((__label__2,), [0.4472169280052185])",2
3,Very comfortable,very comfortable,"((__label__4,), [0.49385765194892883])",4
4,Fast delivery. Product expiry is on Dec 2022. ...,fast delivery. product expiry is on dec 2022. ...,"((__label__4,), [0.2408541440963745])",4
5,it's sooooo cute! i like playing with the glit...,it &apos;s sooooo cute. i like playing with th...,"((__label__4,), [0.38820934295654297])",4
...,...,...,...,...
60423,Product has been succesfully ordered and shipp...,product has been succesfully ordered and shipp...,"((__label__4,), [0.3749113082885742])",4
60424,Opening time a little scared. Fear dalemnya de...,opening time a little scared. fear dalemnya de...,"((__label__3,), [0.3956567645072937])",3
60425,The product quality is excellent. The origina...,the product quality is excellent. the original...,"((__label__5,), [0.49430492520332336])",5
60426,They 're holding up REALLY well also .,they &apos; re holding up really well also .,"((__label__4,), [0.3741464614868164])",4


In [11]:
test['rating']

review_id
1        3
2        2
3        4
4        4
5        4
        ..
60423    4
60424    3
60425    5
60426    4
60427    4
Name: rating, Length: 60427, dtype: object

In [12]:
test['rating'].to_csv('submission.csv')