In [34]:
import gc
import pandas as pd

from scipy.sparse import csr_matrix, hstack

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel

from sklearn.linear_model import LogisticRegression
import lightgbm as lgb

In [35]:
class_names = ['hindi', 'tamil','english']

In [36]:
input_val = [['mujhe choti se ek help chahie hei'],['home loan ka maximum tenure kya hai'],['Does your card offer any unique benefits'],['card epadi activate panradu']]

In [37]:
test = pd.DataFrame(input_val , columns= ['text'])

In [38]:
test.head()

Unnamed: 0,text
0,mujhe choti se ek help chahie hei
1,home loan ka maximum tenure kya hai
2,Does your card offer any unique benefits
3,card epadi activate panradu


In [39]:
test_text = test['text']

In [40]:
word_vectorizer = TfidfVectorizer(
    sublinear_tf=True,
    strip_accents='unicode',
    analyzer='word',
    token_pattern=r'\w{1,}',
    ngram_range=(1, 2),
    max_features=50000)
word_vectorizer.fit(test_text)
test_word_features = word_vectorizer.transform(test_text)
print('Word TFIDF 3/3')

Word TFIDF 3/3


In [41]:
test_features = hstack([test_word_features])
print('HStack 2/2')

HStack 2/2


In [42]:
submission = pd.DataFrame.from_dict({'text': test['text']})
submission

Unnamed: 0,text
0,mujhe choti se ek help chahie hei
1,home loan ka maximum tenure kya hai
2,Does your card offer any unique benefits
3,card epadi activate panradu


In [43]:
for class_name in class_names:
    print(class_name)
    # Load from file
    joblib_file = 'lang_detect_lgbmodel_'+class_name+'.txt'
    model = lgb.Booster(model_file='{}'.format(joblib_file))
    print(model)
    submission[class_name] = model.predict(test_features,predict_disable_shape_check=True)

hindi
<lightgbm.basic.Booster object at 0x000001C9ADF62880>
tamil
<lightgbm.basic.Booster object at 0x000001C9ADED29D0>
english
<lightgbm.basic.Booster object at 0x000001C9ADF24430>


In [44]:
submission.head()

Unnamed: 0,text,hindi,tamil,english
0,mujhe choti se ek help chahie hei,0.051994,0.739912,0.135805
1,home loan ka maximum tenure kya hai,0.479105,0.739912,0.135805
2,Does your card offer any unique benefits,0.009673,0.739912,0.135805
3,card epadi activate panradu,0.002329,0.093376,0.660318
