In [2]:
from __future__ import annotations

import re
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass
import logging
from pathlib import Path
from typing import Optional

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

logging.basicConfig(
    level=logging.INFO,
    format='%(levelname)s: %(message)s',
    stream=sys.stdout
)

In [3]:
@dataclass
class PipelineContext:
    """
    Context for processing data pipeline.

    Attributes:
        csv_path: Path to the original CSV file.
        df: dataframe with data (default None).
        X: features (default None).
        y: target (default None).
    """
    csv_path: Path
    df: Optional[pd.DataFrame] = None
    X: Optional[np.ndarray] = None
    y: Optional[np.ndarray] = None

In [4]:
class Handler(ABC):
    """
    Abstract handler for implementing a chain of responsibility.

    Methods:
        set_next(handler): Sets the next handler in the chain.
        handle(ctx): Processes the data context and passes it to the next handler in the chain.
        _process(ctx): Abstract method for specific processing, must be implemented in subclasses.
    """
    def __init__(self):
        self._next: Optional["Handler"] = None

    def set_next(self, handler: "Handler") -> "Handler":
        self._next = handler
        return handler

    def handle(self, ctx: PipelineContext) -> PipelineContext:
        ctx = self._process(ctx)
        if self._next:
            return self._next.handle(ctx)
        return ctx

    @abstractmethod
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        ...

In [5]:
class LoadCSVHandler(Handler):
    """
    Handler for loading data from a CSV file.

    Methods:
        _process(ctx): Loads data from a CSV file into the context.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"LoadCSVHandler: Starting to load {ctx.csv_path}")
        ctx.df = pd.read_csv(
            ctx.csv_path,
            sep=",",
            quotechar='"',
            engine="python",
            encoding="utf-8",
            index_col=0
        )
        logging.info(f"LoadCSVHandler: Loaded {ctx.csv_path} with {ctx.df.shape[0]} rows and {ctx.df.shape[1]} columns")
        return ctx

In [5]:
class ParseGenderAgeBirthdayHandler(Handler):
    """
    Handler for parsing gender, age, and birthday information from the "Пол, возраст" column.

    Methods:
        _process(ctx): Extracts new columns for gender, age, and birthday month from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseGenderAgeBirthdayHandler: Starting to parse gender, age and birthday month")
        df = ctx.df.copy()
        
        # document possible gender values
        male_values = ['Мужчина', 'Male']
        # female_values = ['Женщина', 'Female']
        # also encode them with 0 - male, 1 - female
        def extract_gender(value: str) -> str:
            raw_gender = value.split(',')[0].strip()
            if raw_gender in male_values:
                return 0 # Male
            return 1 # Female

        def extract_age(value: str) -> str:
            data = value.split(',')
            if len(data) < 2:
                return -1
            raw_age = data[1].strip().replace('\xa0', ' ')
            raw_age = raw_age.split(' ')[0]
            return int(raw_age)

        def extract_birthday_month(value: str) -> str:
            data = value.split(',')
            if len(data) < 3:
                return -1
            raw_birthday_month = data[2].strip().replace('\xa0', ' ')
            raw_birthday_month = raw_birthday_month.split(' ')[-2]
            match raw_birthday_month:
                case 'January' | 'января':
                    return 0
                case 'February' | 'февраля':
                    return 1
                case 'March' | 'марта':
                    return 2
                case 'April' | 'апреля':
                    return 3
                case 'May' | 'мая':
                    return 4
                case 'June' | 'июня':
                    return 5
                case 'July' | 'июля':
                    return 6
                case 'August' | 'августа':
                    return 7
                case 'September' | 'сентября':
                    return 8    
                case 'October' | 'октября':
                    return 9
                case 'November' | 'ноября':
                    return 10
                case 'December' | 'декабря':
                    return 11
                case _:
                    return -1

        df["gender"] = df["Пол, возраст"].apply(extract_gender)
        df["age"] = df["Пол, возраст"].apply(extract_age)
        df["birthday_month"] = df["Пол, возраст"].apply(extract_birthday_month)

        df = df.drop(columns=["Пол, возраст"])

        ctx.df = df
        logging.info(f"ParseGenderAgeHandler: Parsed gender, age and birthday month")
        return ctx

In [6]:
class ParseSalaryHandler(Handler):
    """
    Handler for parsing salary information from the "ЗП" column and converting it to rubles.

    Methods:
        _process(ctx): Extracts new columns for salary from the raw text column and converts it to rubles.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseSalaryHandler: Starting to parse salary")
        df = ctx.df.copy()

        # approximate 2020 currency rates
        currency_rates = {
            'руб.': 1.0,
            'USD': 73.35,
            'RUB': 1.0,
            'KZT': 0.18,
            'бел. руб.': 2.28,
            'EUR': 85.86,
            'грн.': 2.72,
            'сум': 0.005,
            'KGS': 0.98,
            'UAH': 2.5,
            'BYN': 2.5,
            'AZN': 41.1,
            'som': 0.005,
        }
        
        def extract_salary(value: str) -> str:
            value = value.replace('\xa0', ' ').strip().split(' ')
            number = ''
            currency = ''

            for idx, cur in enumerate(value):
                if cur.isdigit():
                    number += cur
                else:
                    currency = ' '.join(value[idx:])
                    break
            return currency_rates[currency.strip()] * float(number)

        df["salary_rub"] = df["ЗП"].apply(extract_salary)

        df = df.drop(columns=["ЗП"])

        ctx.df = df
        logging.info(f"ParseSalaryHandler: Parsed salary")
        return ctx

In [7]:
class ParseJobHandler(Handler):
    """
    Handler for parsing job information from the "Ищет работу на должность:" column.

    Methods:
        _process(ctx): Extracts new columns for job from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseJobHandler: Starting to parse job")
        df = ctx.df.copy()

        # due to pie chart of the distrubution of jobs, we can see 18007 different jobs.
        # However, there are only 133 jobs that are included more than 50 times.
        # So let's take only them - other jobs will be called "other" (as they make to much noise).

        # Also 133 jobs are much better for one-hot encoding (than 18007).
        job_count = df['Ищет работу на должность:'].value_counts()[:133]

        def extract_job(value: str) -> str:
            if value in job_count:
                return value
            return "other"

        df["job"] = df["Ищет работу на должность:"].apply(extract_job)

        df = df.drop(columns=["Ищет работу на должность:"])

        ctx.df = df
        logging.info(f"ParseJobHandler: Parsed job")
        return ctx

In [8]:
class ParseCityHandler(Handler):
    """
    Handler for parsing city information from the "Город" column.

    Methods:
        _process(ctx): Extracts new columns for city from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseCityHandler: Starting to parse city")
        df = ctx.df.copy()

        # group cities by regions
        regions_map = {
            "Moscow & Oblast": [
                "Москва", "Moscow", "Зеленоград", "Подольск", "Балашиха", "Химки", "Мытищи", 
                "Королев", "Люберцы", "Красногорск", "Одинцово", "Домодедово", "Щелково", 
                "Серпухов", "Раменское", "Долгопрудный", "Реутов", "Пушкино", "Лобня"
            ],
            "Saint Petersburg & Oblast": [
                "Санкт-Петербург", "Saint Petersburg", "Гатчина", "Выборг", "Всеволожск", 
                "Сосновый Бор", "Кириши", "Тихвин", "Сертолово"
            ],
            "Central Federal District": [
                "Воронеж", "Ярославль", "Рязань", "Тверь", "Тула", "Липецк", "Курск", 
                "Брянск", "Иваново", "Белгород", "Владимир", "Калуга", "Орел", "Смоленск", 
                "Тамбов", "Кострома", "Старый Оскол"
            ],
            "Volga Federal District": [
                "Казань", "Kazan", "Нижний Новгород", "Самара", "Уфа", "Пермь", "Саратов", 
                "Тольятти", "Ижевск", "Ульяновск", "Оренбург", "Пенза", "Набережные Челны", 
                "Чебоксары", "Киров", "Саранск", "Стерлитамак", "Йошкар-Ола"
            ],
            "South and North Caucasus Federal District": [
                "Краснодар", "Ростов-на-Дону", "Волгоград", "Сочи", "Ставрополь", "Астрахань", 
                "Севастополь", "Симферополь", "Новороссийск", "Таганрог", "Махачкала", 
                "Владикавказ", "Грозный", "Майкоп", "Пятигорск"
            ],
            "Ural Federal District": [
                "Екатеринбург", "Yekaterinburg", "Челябинск", "Тюмень", "Магнитогорск", 
                "Сургут", "Нижневартовск", "Курган", "Новый Уренгой", "Ноябрьск", "Ханты-Мансийск"
            ],
            "Siberian Federal District": [
                "Новосибирск", "Novosibirsk", "Красноярск", "Омск", "Томск", "Барнаул", 
                "Иркутск", "Кемерово", "Новокузнецк", "Абакан", "Братск", "Ангарск"
            ],
            "Far Eastern Federal District": [
                "Владивосток", "Хабаровск", "Улан-Удэ", "Чита", "Благовещенск", "Якутск", 
                "Петропавловск-Камчатский", "Южно-Сахалинск", "Находка"
            ],
            "Kazakhstan": [
                "Алматы", "Almaty", "Нур-Султан", "Астана", "Astana", "Шымкент", "Актобе", 
                "Караганда", "Атырау", "Актау", "Павлодар", "Уральск"
            ],
            "Belarus": [
                "Минск", "Minsk", "Гомель", "Витебск", "Могилев", "Гродно", "Брест"
            ],
            "Other countries / CIS": [
                "Киев", "Kyiv", "Ташкент", "Бишкек", "Тбилиси", "Баку", "Ереван", "Рига", "Вильнюс"
            ]
        }

        def extract_city(value: str) -> str:
            city_name = value.split(',')[0].strip()
            for region, cities in regions_map.items():
                if city_name in cities:
                    return region
            return "Other"

        df["city"] = df["Город"].apply(extract_city)

        df = df.drop(columns=["Город"])

        ctx.df = df
        logging.info(f"ParseCityHandler: Parsed city")
        return ctx

In [9]:
class ParseEmploymentHandler(Handler):
    """
    Handler for parsing employment information from the "Занятость" column.

    Methods:
        _process(ctx): Extracts new columns for employment from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseEmploymentHandler: Starting to parse employment")
        df = ctx.df.copy()

        # group employment
        employment_map = {
            "full_time": ["полная занятость", "full time"],
            "part_time": ["частичная занятость", "part time"],
            "project": ["проектная работа", "project work"],
            "internship": ["стажировка", "work placement"],
            "volunteering": ["волонтерство", "volunteering"]
        }

        for column_name, keywords in employment_map.items():
            def check_employment(value: str) -> int:
                value_lower = value.lower()
                if any(keyword in value_lower for keyword in keywords):
                    return 1
                return 0
            
            df[f"emp_{column_name}"] = df["Занятость"].apply(check_employment)

        df = df.drop(columns=["Занятость"])

        ctx.df = df
        logging.info(f"ParseEmploymentHandler: Parsed employment")
        return ctx

In [None]:
class ParseWorkScheduleHandler(Handler):
    """
    Handler for parsing work schedule information from the "График" column.

    Methods:
        _process(ctx): Extracts new columns for work schedule from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseWorkScheduleHandler: Starting to parse work schedule")
        df = ctx.df.copy()

        schedule_map = {
            "full_day": ["полный день", "full day"],
            "flexible": ["гибкий график", "flexible schedule"],
            "shift": ["сменный график", "shift schedule"],
            "remote": ["удаленная работа", "remote working"],
            "rotation": ["вахтовый метод", "rotation based work"]
        }

        for column_name, keywords in schedule_map.items():
            def check_schedule(value: str) -> int:
                value_lower = value.lower()
                if any(keyword in value_lower for keyword in keywords):
                    return 1
                return 0
            
            df[f"sch_{column_name}"] = df["График"].apply(check_schedule)

        df = df.drop(columns=["График"])

        ctx.df = df
        logging.info(f"ParseWorkScheduleHandler: Parsed work schedule")
        return ctx

In [36]:
class ParseExperienceHandler(Handler):
    """
    Handler for parsing experience information from the "Опыт (двойное нажатие для полной версии)" column.

    Methods:
        _process(ctx): Extracts new columns for experience from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseExperienceHandler: Starting to parse experience")
        df = ctx.df.copy()

        def check_experience(value: str) -> int:
            years_pattern = r'(\d+)\s*(?:год|года|лет)'
            months_pattern = r'(\d+)\s*(?:месяц|месяца|месяцев)'
            
            experience_part = value.split('\n')[0]
            
            years = re.search(years_pattern, experience_part)
            months = re.search(months_pattern, experience_part)
            total_months = 0
            if years:
                total_months += int(years.group(1)) * 12
            if months:
                total_months += int(months.group(1))
                
            return total_months
        
        df["experience_months"] = df["Опыт (двойное нажатие для полной версии)"].apply(check_experience)

        df = df.drop(columns=["Опыт (двойное нажатие для полной версии)"])

        ctx.df = df
        logging.info(f"ParseExperienceHandler: Parsed experience")
        return ctx

In [37]:
class ParseLastPlaceHandler(Handler):
    """
    Handler for parsing last place information from the "Последенее/нынешнее место работы" column.

    Methods:
        _process(ctx): Removes column for last place from the dataframe.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseLastPlaceHandler: Starting to parse last place")
        df = ctx.df.copy()

        df = df.drop(columns=["Последенее/нынешнее место работы"])

        ctx.df = df
        logging.info(f"ParseLastPlaceHandler: Parsed last place")
        return ctx

In [43]:
class ParseLastJobHandler(Handler):
    """
    Handler for parsing last job information from the "Последеняя/нынешняя должность" column.

    Methods:
        _process(ctx): Extracts new columns for last job from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseLastJobHandler: Starting to parse last job")
        df = ctx.df.copy()

        # lets take jobs from jobs column and parse only them
        jobs = df['job'].value_counts()

        def extract_job(value: str) -> str:
            if value in jobs:
                return value
            return "other"

        df["last_job"] = df["Последеняя/нынешняя должность"].apply(extract_job)

        df = df.drop(columns=["Последеняя/нынешняя должность"])

        ctx.df = df
        logging.info(f"ParseLastJobHandler: Parsed last job")
        return ctx

In [49]:
class ParseEducationHandler(Handler):
    """
    Handler for parsing education information from the "Образование и ВУЗ" column.

    Methods:
        _process(ctx): Extracts new columns for education from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseEducationHandler: Starting to parse education")
        df = ctx.df.copy()

        education_map = {
            "incomplete_higher": ["неоконченное высшее", "incomplete higher"],
            "higher": ["высшее образование", "higher education"],
            "secondary_special": ["среднее специальное", "secondary special"],
            "secondary": ["среднее образование", "secondary education"]
        }

        for column_name, keywords in education_map.items():
            def extract_level(value: str) -> int:
                value_lower = value.lower()
                if any(keyword in value_lower for keyword in keywords):
                    return 1
                return 0
            
            df[f"edu_{column_name}"] = df["Образование и ВУЗ"].apply(extract_level)

        df = df.drop(columns=["Образование и ВУЗ"])

        ctx.df = df
        logging.info(f"ParseEducationHandler: Parsed education")
        return ctx

In [76]:
class ParseResumeHandler(Handler):
    """
    Handler for parsing resume information from the "Обновление резюме" column. 
    Splits resume in "old" (more than 1 year) and "not old" (less than 1 year).

    Methods:
        _process(ctx): Extracts new columns for resume from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseResumeHandler: Starting to parse resume")
        df = ctx.df.copy()

        def extract_oldness(value: str) -> str:
            try:
                year = int(value.split('.')[2].split(' ')[0])
            except Exception as e:
                logging.error(f"ParseResumeHandler: Error extracting oldness: {e}")
                year = 0
            return 0 if year > 2018 else 1

        df["old_resume"] = df["Обновление резюме"].apply(extract_oldness)

        df = df.drop(columns=["Обновление резюме"])

        ctx.df = df
        logging.info(f"ParseResumeHandler: Parsed resume")
        return ctx

In [79]:
class ParseAutoHandler(Handler):
    """
    Handler for parsing auto information from the "Авто" column. 

    Methods:
        _process(ctx): Extracts new columns for auto from the raw text column.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"ParseAutoHandler: Starting to parse auto")
        df = ctx.df.copy()

        def extract_auto(value: str) -> str:
            match value:
                case 'Имеется собственный автомобиль':
                    return 1
                case 'Не указано':
                    return 0
                case _:
                    return 0

        df["auto"] = df["Авто"].apply(extract_auto)

        df = df.drop(columns=["Авто"])

        ctx.df = df
        logging.info(f"ParseAutoHandler: Parsed auto")
        return ctx

In [92]:
class EncodeCategoricalFeaturesHandler(Handler):
    """
    Handler for encoding categorical features. 

    Methods:
        _process(ctx): Encodes categorical features.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"EncodeCategoricalFeaturesHandler: Start with {ctx.df.shape[1]} features")
        df = ctx.df.copy()

        cat_cols = df.select_dtypes(include="object").columns
        df = pd.get_dummies(df, columns=cat_cols, drop_first=True)

        ctx.df = df
        logging.info(f"EncodeCategoricalFeaturesHandler: Updated df with {ctx.df.shape[1]} features")
        return ctx

In [106]:
class SplitDataHandler(Handler):
    """
    Handler for splitting the dataset into features and target.

    Methods:
        _process(ctx): Splits the dataset into features and target.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"SplitDataHandler: Splitting data into features and target")
        df = ctx.df.copy()

        ctx.X = df.drop(columns=["salary_rub"])
        ctx.y = df["salary_rub"]

        ctx.df = None
        logging.info(f"SplitDataHandler: df was split into X and y.")
        return ctx

In [109]:
class SaveDataHandler(Handler):
    """
    Handler for saving the dataset into X.npy and y.npy files.

    Methods:
        _process(ctx): Saves the dataset into X.npy and y.npy files.
    """
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        logging.info(f"SaveDataHandler: Saving data")
        np.save("X.npy", ctx.X)
        np.save("y.npy", ctx.y)
        logging.info(f"SaveDataHandler: Data was saved to X.npy and y.npy files")
        return ctx


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

class ParseDescriptionNLPHandler(Handler):
    """Handler for NLP processing of experience/skills description."""
    def _process(self, ctx: PipelineContext) -> PipelineContext:
        """
        Extract TF-IDF features from experience description.
        
        Args:
            ctx: Pipeline context containing the dataframe.
            
        Returns:
            PipelineContext: Context updated with TF-IDF features.
        """
        logging.info("ParseDescriptionNLPHandler: Starting NLP processing")
        df = ctx.df.copy()
        
        text_col = 'Опыт (двойное нажатие для полной версии)'
            
        if text_col not in df.columns:
            logging.warning(f"ParseDescriptionNLPHandler: Column '{text_col}' not found. Skipping NLP.")
            return ctx
        
        tfidf = TfidfVectorizer(
            max_features=50,
            ngram_range=(1, 2),
            binary=True
        )

        def extract_text(value: str) -> int:
            text = ''
            try:
                value_splitted = value.split('месяц')
                if len(value_splitted) > 1:
                    text = value_splitted[1]
                    return text
                value_splitted = value.split('год')
                if len(value_splitted) > 1:
                    text = value_splitted[1]
                    return text
                value_splitted = value.split('лет')
                if len(value_splitted) > 1:
                    text = value_splitted[1]
                    return text
            except Exception as error:
                logging.info(error)
            return text
        
        texts = df[text_col].apply(extract_text).astype(str)
        
        try:
            tfidf_matrix = tfidf.fit_transform(texts)
            feature_names = [f"tfidf_{name}" for name in tfidf.get_feature_names_out()]
            
            tfidf_df = pd.DataFrame(
                tfidf_matrix.toarray(), 
                columns=feature_names, 
                index=df.index
            )
            
            df = pd.concat([df, tfidf_df], axis=1)
            logging.info(f"ParseDescriptionNLPHandler: Added {len(feature_names)} TF-IDF features")
            
        except Exception as e:
            logging.error(f"ParseDescriptionNLPHandler: NLP failed: {e}")
            
        ctx.df = df
        return ctx

In [32]:
def build_pipeline() -> Handler:
    """
    Builds the full data processing pipeline by chaining together all handlers in the required order.

    Returns:
        Handler: The first handler in the pipeline (LoadCSVHandler).
    """
    load = LoadCSVHandler()
    ne = ParseDescriptionNLPHandler()
    # gender_age = ParseGenderAgeBirthdayHandler()
    # salary = ParseSalaryHandler()
    # job = ParseJobHandler()
    # city = ParseCityHandler()
    # employment = ParseEmploymentHandler()
    # work_schedule = ParseWorkScheduleHandler()
    # experience = ParseExperienceHandler()
    # last_place = ParseLastPlaceHandler()
    # last_job = ParseLastJobHandler()
    # education = ParseEducationHandler()
    # resume = ParseResumeHandler()
    # auto = ParseAutoHandler()
    # encode_categorical_features = EncodeCategoricalFeaturesHandler()

    # split_data = SplitDataHandler()

    # save_data = SaveDataHandler()

    load.set_next(ne)\
    #     .set_next(salary)\
    #     .set_next(job)\
    #     .set_next(city)\
    #     .set_next(employment)\
    #     .set_next(work_schedule)\
    #     .set_next(experience)\
    #     .set_next(last_place)\
    #     .set_next(last_job)\
    #     .set_next(education)\
    #     .set_next(resume)\
    #     .set_next(auto)\
    #     .set_next(encode_categorical_features)\
    #     .set_next(split_data)\
    #     .set_next(save_data)
    
    return load

pipeline = build_pipeline()

ctx = PipelineContext(csv_path=Path("hh.csv"))
ctx = pipeline.handle(ctx)

INFO: LoadCSVHandler: Starting to load hh.csv
INFO: LoadCSVHandler: Loaded hh.csv with 66945 rows and 12 columns
INFO: ParseDescriptionNLPHandler: Starting NLP processing
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index out of range
INFO: list index ou

In [33]:
ctx.df


Unnamed: 0,"Пол, возраст",ЗП,Ищет работу на должность:,Город,Занятость,График,Опыт (двойное нажатие для полной версии),Последенее/нынешнее место работы,Последеняя/нынешняя должность,Образование и ВУЗ,...,tfidf_работы полный,tfidf_разработка,tfidf_руб,tfidf_руб информационные,tfidf_телеком,tfidf_технологии,tfidf_технологии интернет,tfidf_удаленная,tfidf_удаленная работа,tfidf_частичная
0,"Мужчина , 42 года , родился 6 октября 1976",27 000 руб.,Старший администратор,"Липецк , не готов к переезду , не готов к кома...",полная занятость,полный день,Опыт работы 6 лет 1 месяц\n\nСтарший администр...,Трц Ривьера,Старший администратор по сохраности активов,Среднее специальное образование 1994 Спту 26,...,0.232768,0.000000,0.193313,0.193486,0.185914,0.180019,0.186313,0.000000,0.000000,0.000000
1,"Мужчина , 41 год , родился 14 января 1978",60 000 руб.,Старший IT инженер,"Сочи , не готов к переезду , готов к командиро...","частичная занятость, полная занятость","гибкий график, полный день",Опыт работы 19 лет 2 месяца\n\nСтарший IT инже...,"Эскорт-Центр, Москва, ЗАО",Старший инженер сервисного центра,Высшее образование 1999 СОЧИНСКИЙ ГОС. УНИВЕР...,...,0.000000,0.000000,0.161124,0.161268,0.154957,0.150043,0.155290,0.000000,0.000000,0.243929
2,"Мужчина , 44 года , родился 21 февраля 1975",65 000 руб.,"Системный администратор, Системный инженер","Липецк , готов к переезду , готов к командировкам",полная занятость,"удаленная работа, полный день, вахтовый метод",Опыт работы 19 лет 9 месяцев\n\nСистемный адми...,"СПССПК ""ЭКОПТИЦА""",Системный администратор,Высшее образование 2003 Московский государств...,...,0.000000,0.000000,0.173794,0.173950,0.167142,0.161842,0.167501,0.255887,0.257147,0.000000
3,"Мужчина , 43 года , родился 9 февраля 1976",70 000 руб.,"Web-программист, Web-администратор, Web-мастер","Москва , м. Марьино , не готов к переезду , г...",полная занятость,"удаленная работа, гибкий график, полный день",Опыт работы 5 лет 9 месяцев\n\nWeb-программист...,"ГБУ ""МДОО""",специалист отдела интернет-проектов,Высшее образование 2002 Российский государств...,...,0.000000,0.216893,0.153364,0.153501,0.147494,0.142817,0.147811,0.225807,0.226918,0.000000
4,"Мужчина , 39 лет , родился 14 июня 1979",45 000 руб.,Старший системный администратор \ начальник от...,"Тула , не готов к переезду , готов к редким ко...","частичная занятость, полная занятость","полный день, сменный график",Опыт работы 13 лет 5 месяцев\n\nСтарший систем...,"ООО ""СервисСофт""",Старший системный администратор,Высшее образование 2007 Российский государств...,...,0.225303,0.000000,0.187112,0.187280,0.179951,0.174245,0.180338,0.000000,0.000000,0.283273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66940,"Мужчина , 22 года , родился 4 сентября 1996",50 000 руб.,Программист Python,"Иркутск , хочу переехать (Москва, Россия, Друг...","частичная занятость, проектная работа, полная...","гибкий график, полный день, сменный график, у...",Опыт работы 1 год\n\nИюнь 2018 — Август 2018 ...,Индивидуальное предпринимательство / частная п...,Программист Python,Среднее образование 2012 МБОУ СОШ №7,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
66941,"Мужчина , 26 лет , родился 2 декабря 1992",120 000 руб.,Руководитель отдела по работе с ключевыми клие...,"Москва , м. Новогиреево , готов к переезду (С...",полная занятость,полный день,Опыт работы 7 лет 9 месяцев\n\nАпрель 2018 — п...,RDP Group,Региональный менеджер по работе с ключевыми кл...,Высшее образование 2016 Национальный институт...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
66942,"Мужчина , 55 лет , родился 21 февраля 1964",80 000 руб.,программист,"Санкт-Петербург , м. Гражданский проспект , г...","частичная занятость, проектная работа, полная...","гибкий график, полный день, удаленная работа",Опыт работы 34 года 6 месяцев\n\nДекабрь 1984 ...,ВНИИЭФ,Ведущий инженер-программист,Высшее образование 1989 Московский Инженерно-ф...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
66943,"Женщина , 32 года , родилась 23 февраля 1987",70 000 руб.,Руководитель проектов,"Самара , не готова к переезду , готова к коман...",полная занятость,полный день,Опыт работы 9 лет 2 месяца\n\nМай 2014 — по на...,МегаФон,Старший менеджер по развитию систем самообслуж...,Высшее образование 2012 Самарская гуманитарна...,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
