In [None]:
!pip install tensorflow==2.8.0 keras==2.8.0
!pip install h5py==2.10.0
!pip install nltk

In [6]:
import keras
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import tokenizer_from_json
import json
import nltk
nltk.download("stopwords")
from nltk.corpus import stopwords
from string import punctuation
russian_stopwords = stopwords.words("russian")

class ClassifierLSTM:
    
    def __init__(self, weights = 'lstm'):
        self.model = keras.models.load_model('lstm')
        
        with open('lstm_tokenizer.json') as f:
            data = json.load(f)
            self.tokenizer = tokenizer_from_json(data)
        
        self.encoder = LabelEncoder()
        self.encoder.classes_ = np.load('lstm_encoder_classes.npy')

    def preprocess(self, line):
        support_chars = {33: ' ', 34: ' ', 35: ' ', 36: ' ', 37: ' ', 38: ' ', 39: ' ', 40: ' ', 41: ' ', 42: ' ', 43: ' ', 44: ' ', 45: ' ', 46: ' ', 47: ' ', 58: ' ', 59: ' ', 60: ' ', 61: ' ', 62: ' ', 63: ' ', 64: ' ', 91: ' ', 92: ' ', 93: ' ', 94: ' ', 95: ' ', 96: ' ', 123: ' ', 124: ' ', 125: ' ', 126: ' '}
        line = line.translate(support_chars).lower().split(' ')
        t = [token for token in line if token not in russian_stopwords and token != " " and token.strip() not in punctuation]
        return ' '.join(t)        
        
    def predict(self, line):
        line = self.preprocess(line)
        
        text_sec = self.tokenizer.texts_to_sequences([input_line])
        text_sec = pad_sequences(text_sec, maxlen=69)
        pred = self.model.predict(text_sec, batch_size=1, verbose=1)
        pred = np.argmax(pred,axis=1)
        pred = self.encoder.inverse_transform(pred) - 1
        if pred < 0:
            pred = 0
        
        return pred
        
myLSTMmodel = ClassifierLSTM()

input_line = 'изделия прочие пластмасс изделия прочих материалов товарных позиций 3901 3914 прочие прочие прочие прочие' 
res = myLSTMmodel.predict(input_line)
print(res)

input_line = 'прочие семена плоды прочих масличных культур'
res = myLSTMmodel.predict(input_line)
print(res)



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vasiliev\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


[39]
[9]
