In [1]:
from flask import Flask, render_template, request
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re
from catboost import CatBoostClassifier
import pickle
import threading
import os
from IPython.display import display, HTML

# ========== Часть 1: Анализ вакансий и статей ==========
# Загрузка данных для анализа вакансий
df_jobs = pd.read_excel('IT_vacancies_analyst.xlsx')
df_articles = pd.read_excel('all_habr_.xlsx')

def preprocess_skills(skills):
    """NLP-предобработка навыков"""
    skills = re.sub(r'[^a-zA-Zа-яА-Я0-9,\s]', '', skills)
    skills = [s.strip().lower() for s in skills.split(',')]
    skills = sorted(list(set(skills)))
    return ','.join(skills)

def find_top_jobs(user_skills, df_jobs, n=5):
    """Поиск топ вакансий по навыкам"""
    user_skills = preprocess_skills(user_skills)
    df_jobs['processed_skills'] = df_jobs['key_skills'].apply(preprocess_skills)
    
    vectorizer = TfidfVectorizer(tokenizer=lambda x: x.split(','))
    skills_matrix = vectorizer.fit_transform(df_jobs['processed_skills'])
    user_vec = vectorizer.transform([user_skills])
    similarities = cosine_similarity(user_vec, skills_matrix).flatten()
    
    top_indices = np.argsort(similarities)[-n:][::-1]
    return df_jobs.iloc[top_indices]
    
def find_missing_skills_per_job(user_skills, jobs):
    """Определение недостающих навыков"""
    user_skill_set = set(skill.strip().lower() for skill in user_skills.split(','))
    
    result = []
    for _, job in jobs.iterrows():
        job_skills = [skill.strip().lower() for skill in job['key_skills'].split(',')]
        missing = [skill for skill in job_skills if skill not in user_skill_set]
        
        missing_skills_message = missing if missing else ["Вы идеальный кандидат! 🎉"]
        
        result.append({
            'title': job['title'],
            'company': job['company'],
            'salary': job['salary'],
            'link': job['link'],
            'missing_skills': missing_skills_message,
            'all_skills': job['key_skills'],
            'is_perfect_match': not missing
        })
    return pd.DataFrame(result)

def find_articles_for_skills(skills, df_articles, n_per_skill=1):
    """Поиск статей для навыков"""
    articles_list = []
    for skill in set(skills):
        skill_articles = df_articles[
            df_articles['Hashtags'].str.contains(skill, case=False, regex=False)
        ]
        if not skill_articles.empty:
            best_article = skill_articles.nlargest(n_per_skill, 'Rating').iloc[0]
            articles_list.append({
                'skill': skill,
                'title': best_article['Title'],
                'rating': best_article['Rating'],
                'url': best_article['URL']
            })
    return pd.DataFrame(articles_list)

# ========== Часть 2: Предсказание позиции ==========
MODEL_PATH = 'catboost_model.cbm'
PICKLE_PATH = 'catboost_model.pkl'

def load_or_train_model():
    """Загрузка или обучение модели CatBoost"""
    if os.path.exists(MODEL_PATH):
        try:
            model = CatBoostClassifier()
            model.load_model(MODEL_PATH)
            print("Модель успешно загружена из .cbm файла")
            return model
        except Exception as e:
            print(f"Ошибка загрузки .cbm файла: {e}")
    
    if os.path.exists(PICKLE_PATH):
        try:
            with open(PICKLE_PATH, 'rb') as f:
                model = pickle.load(f)
            print("Модель успешно загружена из .pkl файла")
            return model
        except Exception as e:
            print(f"Ошибка загрузки .pkl файла: {e}")
    
    print("Создаем демо-модель...")
    from sklearn.datasets import make_classification
    X, y = make_classification(n_samples=1000, n_features=15, n_classes=5)
    model = CatBoostClassifier(iterations=100, verbose=0)
    model.fit(X, y)
    model.save_model(MODEL_PATH)
    with open(PICKLE_PATH, 'wb') as f:
        pickle.dump(model, f)
    print("Демо-модель создана и сохранена")
    return model

model = load_or_train_model()

# ========== Flask приложение ==========
app = Flask(__name__, template_folder='templates')

@app.route('/')
def home():
    """Главная страница с выбором сервиса"""
    return render_template('index.html')

@app.route('/job_analyzer', methods=['GET', 'POST'])
def job_analyzer():
    """Анализ вакансий по навыкам"""
    if request.method == 'POST':
        user_skills = request.form['skills']
        
        top_jobs = find_top_jobs(user_skills, df_jobs)
        missing_skills_df = find_missing_skills_per_job(user_skills, top_jobs)
        
        all_missing_skills = []
        for skills in missing_skills_df['missing_skills']:
            all_missing_skills.extend(skills)
        
        articles_df = find_articles_for_skills(all_missing_skills, df_articles)
        
        return render_template('job_results.html', 
                           skills=user_skills,
                           jobs=missing_skills_df.to_dict('records'),
                           articles=articles_df.to_dict('records'))
    
    return render_template('job_analyzer.html')

@app.route('/position_predictor', methods=['GET', 'POST'])
def position_predictor():
    """Предсказание позиции по параметрам"""
    if request.method == 'POST':
        try:
            form_data = {
                'Age': int(request.form.get('Age', 25)),
                'Gender': int(request.form.get('Gender', 1)),
                'Number of vacation days': int(request.form.get('vacation_days', 20)),
                'Seniority level_Lead': request.form.get('seniority') == 'Lead',
                'Seniority level_Middle': request.form.get('seniority') == 'Junior',
                'Seniority level_Senior': request.form.get('seniority') == 'Middle',
                'Your main technology / programming language_C++': 'tech_cpp' in request.form,
                'Your main technology / programming language_Java': 'tech_java' in request.form,
                'Your main technology / programming language_Javascript': 'tech_js' in request.form,
                'Your main technology / programming language_Python': 'tech_python' in request.form,
                'Employment status_Part-time employee': int(request.form.get('employment_status', 1)),
                'Сontract duration_Unlimited contract': request.form.get('contract') == 'True',
                'Company size_micro': request.form.get('company_size') == 'True',
                'Company type_Professional Services': request.form.get('company_type') == 'True',
                'Monthly Salary (EUR)': int(request.form.get('salary', 10000))
            }
            
            df = pd.DataFrame([form_data])
            
            for feature in model.feature_names_:
                if feature not in df.columns:
                    df[feature] = False
            
            probabilities = model.predict_proba(df)[0]
            classes = model.classes_
            sorted_probs = sorted(zip(classes, probabilities), key=lambda x: x[1], reverse=True)
            
            recommendations = [
                {'position': pos, 'probability': f"{prob*100:.1f}"}
                for pos, prob in sorted_probs[:5]
            ]
            
            return render_template('position_results.html', recommendations=recommendations)
        
        except Exception as e:
            error_msg = f"Ошибка обработки запроса: {str(e)}"
            return render_template('position_predictor.html', error=error_msg)
    
    return render_template('position_predictor.html')

def run_flask():
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)

# Запуск в отдельном потоке
thread = threading.Thread(target=run_flask)
thread.daemon = True
thread.start()

Модель успешно загружена из .cbm файла


In [None]:
http://localhost:5000/