In [4]:
from nltk.corpus import stopwords
import fasttext.util
import pandas as pd
import numpy as np
import fasttext
import telebot
import pickle
import dill
import ast
import re

In [5]:
def make_predict(vacancy_text):
    vacancy_text = vacancy_text.strip()
    feature_dict = {}

    feature_list = [i for i in vacancy_text.split('\n') if i!='']
    
    feature_list = [i.split(':') for i in feature_list]
    
    for i in feature_list:
        feat_name = i[0].strip()
        feat_model_name = model_name_dict[feat_name]
    
        clear_feat = i[1].replace('.', '').strip()
    
        if feat_model_name in ['required_experience']:
            feature_dict[feat_model_name] = clear_feat[0].upper()+clear_feat[1:]
        else:
            feature_dict[feat_model_name] = clear_feat
    
    data = pd.DataFrame.from_dict([feature_dict])
    
    data.city_id = data.city_id.apply(lambda value: str(value) if value in top_10_city else 'Остальные')
    
    data.custom_position = data.custom_position.apply(lambda value: ' '.join([word for word in value.split(' ') if word not in stopwords_list]))

    data.education_name = data.education_name.replace('высшее (бакалавр)', 'высшее') 
    
    data['city_id'+'_'+data.city_id.values[0]] = True
    data['required_experience'+'_'+data.required_experience.values[0]] = True
    data['schedule'+'_'+data.schedule.values[0]] = True
    data['education_name'+'_'+data.education_name.values[0]] = True

    custom_position_name = data.custom_position.values[0]
    vector_professions = np.mean([ft.get_word_vector(word) for word in custom_position_name.split(' ')], axis=0).tolist()
    data['vector_professions'] = [vector_professions]
    
    work_skills = data.work_skills_str.values[0]
    vector_skills = np.mean([ft.get_word_vector(word) for word in work_skills.split(' ')], axis=0).tolist()
    data['vector_skills'] = [vector_skills]
    
    vector_professions_df = pd.DataFrame(data.vector_professions.tolist(), columns=[f"feature_prof_{c}" for c in range(1, 100+1)])
    data = pd.concat([data, vector_professions_df], axis=1)
    
    vector_skills_df = pd.DataFrame(data.vector_skills.tolist(), columns=[f"feature_skill_{c}" for c in range(1, 100+1)])
    data = pd.concat([data, vector_skills_df], axis=1)
    
    miss_columns = list(set(fit_columns) - set(data.columns))
    for i in miss_columns:
        data[miss_columns] = False
    
    predict = model.predict(data[model.feature_names_])[0]
    return round(predict)

In [6]:
dim = 100

ft = fasttext.load_model('cc.ru.300.bin')

fasttext.util.reduce_model(ft, dim)

with open('ctb_reg.dill', 'rb') as handle:
    model = pickle.load(handle)

with open('top_10_city.dill', 'rb') as handle:
    top_10_city = pickle.load(handle)

top_10_city = [str(i) for i in top_10_city]

stopwords_list = stopwords.words("russian")

obj_columns = ['schedule','education_name','city_id','required_experience']

fit_columns = model.feature_names_

model_name_dict = {
    'Профессия':'custom_position',
    'Город':'city_id',
    'Опыт работы':'required_experience',
    'Образование':'education_name',
    'График':'schedule',
    'Навыки':'work_skills_str'
}



In [None]:
bot = telebot.TeleBot(<TOKEN>)

@bot.message_handler(commands=['start'])
def send_welcome(message):
    bot.reply_to(message, """
    Привет! Присылай свое резюме c пометкой /predict и я предскажу твою зарплату! Пример:

    /predict
    Профессия: <название>.
    Город: Москва.
    Опыт работы: <нет опыта, от 1 года до 3 лет, от 3 до 6 лет, более 6 лет>.
    Образование: <любое, среднее профессиональное, среднее, неполное высшее, высшее>.
    График: <сменный график, полный рабочий день, вахта, свободный график, удаленная работа, частичная занятость>.
    Навыки: <навыки через запятую>.
    """)

@bot.message_handler(commands=['predict'])
def simplify_text(message):
    input_text = message.text[8:] 
    result = f"Твоя ожидаемая зарплата: {make_predict(input_text)}"
    bot.reply_to(message, result)

bot.polling()