In [1]:
%pip install ipywidgets


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [42]:
import pandas as pd
import ipywidgets as widgets
import importlib  
try:
  constants = importlib.import_module("knowledge-bases.hw1.constants")
except:
  print('')
from constants import get_output_paths, OUTPUT_FILE_NAME




In [40]:
df.dtypes

URL             object
Company         object
Job             object
Salary          object
Location        object
Views            int64
Responses      float64
Category        object
Employment      object
Experience      object
English         object
Domain          object
Description     object
dtype: object

In [87]:
df = pd.read_csv(get_output_paths(OUTPUT_FILE_NAME)[0])

def extract_from_salary(salary_range):
    if pd.isna(salary_range):
        return None
    if "до" in salary_range:
        return None
    salary_parts = salary_range.split('-')
    return int(salary_parts[0])

def extract_to_salary(salary_range):
    if pd.isna(salary_range):
        return None
    if "до" in salary_range:
        return salary_range.split()[1]
    salary_parts = salary_range.split('-')
    if len(salary_parts) == 1:
        return None
    salary_to = int(salary_parts[1])
    return salary_to

def extract_countries(location):
    return ",".join(location.split("(")[0].replace(" ", "").split(','))

def extract_cities(location):
    countries_cities = location.split("(")
    if len(countries_cities) == 1:
        return None
    return ",".join(countries_cities[1].replace(" ", "").replace(")", "").split(','))

def extract_english(english):
    if pd.isna(english):
        return None
    return english.replace("Англійська:", "").replace(" ", "")

df['Salary_from'] = df['Salary'].apply(extract_from_salary)
df['Salary_to'] = df['Salary'].apply(extract_to_salary)
df['Countries'] = df['Location'].apply(extract_countries)
df['Cities'] = df['Location'].apply(extract_cities)
df['English'] = df['English'].apply(extract_english)
df['Salary_from'] = pd.to_numeric(df['Salary_from'], errors='coerce')
df['Salary_to'] = pd.to_numeric(df['Salary_to'], errors='coerce')

df[~df['Salary'].isna()].head(5)

Unnamed: 0,URL,Company,Job,Salary,Location,Views,Responses,Category,Employment,Experience,English,Domain,Description,Salary_from,Salary_to,Countries,Cities
6,https://djinni.co/jobs/606318-storage-inzhener,Object First,Storage інженер,1200-2000,Україна (Київ),12,1.0,Sysadmin,Тільки віддалено,3.0,,Домен: Security,Обов’язки:Виконання різноманітних видів ручног...,1200.0,2000.0,Україна,Київ
7,https://djinni.co/jobs/597673-finance-analyst,First.ua,Finance Analyst,2000-3000,Україна (Київ),7,,Other,Тільки офіс,2.0,,Домен: Gambling,Шукаємо:-Має 2+ роки досвіду на позиції Financ...,2000.0,3000.0,Україна,Київ
19,https://djinni.co/jobs/606308-middle-senior-cl...,itsoft,Middle/Senior Cloud Operations Specialist,4000-6000,"Вірменія, Ізраїль, Україна",20,3.0,DevOps,Тільки віддалено,3.0,Upper-Intermediate,Домен: Hardware / IoT,General informationCommit is the leading syste...,4000.0,6000.0,"Вірменія,Ізраїль,Україна",
21,https://djinni.co/jobs/598576-head-of-digital-...,Supplax,Head of digital marketing,1200-1700,Україна,5,1.0,Marketing,Тільки віддалено,2.0,Intermediate,Домен: Advertising / Marketing,Хто ми?Ми працюємо по моделі Убер. Займаємося ...,1200.0,1700.0,Україна,
27,https://djinni.co/jobs/606304-trainee-affiliat...,Infinite Plus,Trainee Affiliate Manager,300-500,Україна,25,3.0,Marketing,Тільки віддалено,,Upper-Intermediate,Домен: E-commerce / Marketplace,We are looking for Trainee Affiliate Manager t...,300.0,500.0,Україна,


In [181]:
from ipywidgets import Box, VBox, widgets
import math

category_dropdown = widgets.Dropdown( options=list(set(df['Category'].values)), description='Category:')
experience_dropdown = widgets.Dropdown( options=[x for x in list(set(df['Experience'].values)) if not math.isnan(x)], description='Experience:')
country_dropdown = widgets.Dropdown( options=list(set([y.strip() for x in list(set(df['Countries'].values)) for y in x.split(',')])), value= "Україна", description='Country:')

multi_cities = [x for x in list(set(df['Cities'].values)) if not(type(x) == float and math.isnan(x))]
city_dropdown = widgets.Dropdown( options=list(set([y.strip() for x in multi_cities if x for y in x.split(',') if '+' not in y.strip()])), description='City:')
english_dropdown = widgets.Dropdown( options=list(set(df['English'].values)), description='English:')
employment_dropdown = widgets.Dropdown( options=list(set([x for x in df['Employment'].values if ',' not in x])), description='Employment:')

multi_domains = [x.replace("Домен: ", "") for x in list(set(df['Domain'].values)) if not(type(x) == float and math.isnan(x))]
domains_multiselect = widgets.SelectMultiple(
    options=[y.strip() for x in multi_domains for y in x.split('/')],
    description='Not interested in Domains:',
)

salary_from_slider = widgets.IntSlider(
  value=1000,
    min=0,
    max=max([x for x in df['Salary_to'].values if not math.isnan(x)]),
    step=500,
    description='Salary from:',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
widget_list = [
  category_dropdown, 
  experience_dropdown, 
  salary_from_slider, 
  country_dropdown,
  city_dropdown, 
  english_dropdown, 
  employment_dropdown, 
  domains_multiselect
  ]

changed_idxs = set()
for widget in widget_list:
  widget.observe(lambda change: changed_idxs.add(widget_list.index(change.owner)), names='value')

Box([VBox(children=widget_list)])

Box(children=(VBox(children=(Dropdown(description='Category:', options=('Lead', 'Rust', 'Data Analyst', 'Sales…

In [183]:
# Посада                    -> Job
job = "Data"
# Категорія                 -> Category
category = category_dropdown.value
# Навички                   -> FULL_TEXT in Description
skills = ["python"]
# Досвід роботи             -> Experience
experience = experience_dropdown.value
# Зарплатні очікування      -> Salary [estimate]
salary_from = salary_from_slider.value 
# Країна перебування        -> Location
country = country_dropdown.value
# Місто перебування         -> Location
city = city_dropdown.value
# Рівень англійської        -> English
english = english_dropdown.value
# Варіанти зайнятості       -> Employment
employment = employment_dropdown.value
# Не розглядаю              -> Domain
domains = domains_multiselect.value

(job,category,skills,experience,salary_from,country,city,english,employment,domains, changed_idxs)

('Data',
 'Lead',
 ['python'],
 1.0,
 1000,
 'Україна',
 'Львів',
 'Upper-Intermediate',
 'Тільки віддалено',
 (),
 set())

In [210]:
Job_bool =   (df['Job'].str.contains(job, case=False, na=False) if job else True) 
Category_bool =   (df['Category'].str.lower() == category.lower() if 0 in changed_idxs else True) 
Description_bool =   (df['Description'].str.contains('|'.join(skills), case=False, na=False) if len(skills) else True) 
Experience_bool =   (df['Experience'] <= experience if 1 in changed_idxs else True) 
Salary_bool =   (df['Salary'].isna() | ((df['Salary_from'] <= salary_from) & (df['Salary_to'] >= salary_from)) if 2 in changed_idxs else True) 
Countries_bool =   (df['Countries'].str.contains(country, case=False, na=False) if 3 in changed_idxs else True) 
Cities_bool =   (df['Cities'].str.contains(city, case=False, na=False) if 4 in changed_idxs else True) 
English_bool =   (df['English'].isna() | (df['English'] == english) if 5 in changed_idxs else True) 
Employment_bool =   (df['Employment'].str.contains(employment, case=False, na=False) if 6 in changed_idxs else True) 
Domain_bool =  (~df['Domain'].str.contains('|'.join(domains), case=False, na=False) if 7 in changed_idxs else True)


In [211]:
(
  0 if type(Job_bool) == bool else (Job_bool==True).sum(),
  0 if type(Category_bool) == bool else (Category_bool==True).sum(),
  0 if type(Description_bool) == bool else (Description_bool==True).sum(),
  0 if type(Experience_bool) == bool else (Experience_bool==True).sum(),
  0 if type(Salary_bool) == bool else (Salary_bool==True).sum(),
  0 if type(Countries_bool) == bool else (Countries_bool==True).sum(),
  0 if type(Cities_bool) == bool else (Cities_bool==True).sum(),
  0 if type(English_bool) == bool else (English_bool==True).sum(),
  0 if type(Employment_bool) == bool else (Employment_bool==True).sum(),
  0 if type(Domain_bool) == bool else (Domain_bool==True).sum(),
)


(36, 0, 153, 0, 0, 0, 0, 0, 0, 0)

In [212]:

df[
  Job_bool &
  Category_bool &
  Description_bool &
  Experience_bool &
  Salary_bool &
  Countries_bool &
  Cities_bool &
  English_bool &
  Employment_bool &
  Domain_bool
  ]



Unnamed: 0,URL,Company,Job,Salary,Location,Views,Responses,Category,Employment,Experience,English,Domain,Description,Salary_from,Salary_to,Countries,Cities
9,https://djinni.co/jobs/598678-junior-middle-da...,BetterMe,Junior/Middle Data Analyst,,Україна (Київ),20,3.0,Data Analyst,Office або Remote,1.0,,Домен: Healthcare / MedTech,ABOUT US:BetterMe is a health & wellness platf...,,,Україна,Київ
73,https://djinni.co/jobs/606275-data-analyst,12go.asia,Data Analyst,2500-3000,Україна (Київ),81,23.0,Data Analyst,Тільки віддалено,3.0,Intermediate,Домен: Travel / Tourism,About 12Go.Asia: - Leading multi-modal OTA (On...,2500.0,3000.0,Україна,Київ
76,https://djinni.co/jobs/580922-middle-data-engi...,Sigma Software,Middle Data Engineer (Healthcare domain),,"Болгарія, Чехія, Угорщина, Польща,...",6,1.0,Data Engineer,Office або Remote,2.0,Upper-Intermediate,Домен: Healthcare / MedTech,Sigma Software is looking for a motivated Data...,,,"Болгарія,Чехія,Угорщина,Польща,Португалія",
163,https://djinni.co/jobs/606196-data-scientist-r...,Grid Dynamics,Data Scientist (Retraining Opportunity),,"Україна (Дніпро, Київ, Львів, Харків)",42,2.0,Data Science,Office або Remote,3.0,Upper-Intermediate,,We have a unique retraining opportunity for in...,,,Україна,"Дніпро,Київ,Львів,Харків"
169,https://djinni.co/jobs/606193-data-scientist,AMLBot,Data Scientist,,Україна (Київ),44,7.0,Data Science,Office або Remote,3.0,Intermediate,Домен: Blockchain / Crypto,AMLBot is looking for an experienced Data Scie...,,,Україна,Київ
217,https://djinni.co/jobs/606153-lead-data-analyst,dok.ua,Lead Data Analyst,,Україна (Київ),14,1.0,Data Analyst,Гібридна робота,3.0,Intermediate,Домен: E-commerce / Marketplace,🔊 dok.ua - працюємо як e-commerce з 10-річним ...,,,Україна,Київ
218,https://djinni.co/jobs/606159-middle-data-engi...,GR8 Tech,Middle Data Engineer for Data Models Team,,"Чехія, Польща, Україна",67,3.0,Data Engineer,Office або Remote,2.0,Upper-Intermediate,Домен: Gambling,About your key responsibilities and impact:- W...,,,"Чехія,Польща,Україна",
349,https://djinni.co/jobs/606020-data-quality-ass...,Dash Financial Technologies,Data Quality Assurance Engineer,5000-5500,Сполучені Штати,157,24.0,QA Automation,Тільки віддалено,5.0,Upper-Intermediate,Домен: Fintech,Please make sure you have read the Job Descrip...,5000.0,5500.0,СполученіШтати,
356,https://djinni.co/jobs/606015-middle-big-data-...,Adjutor,Middle Big Data Engineer,до 5600,Україна,56,10.0,Data Engineer,Тільки віддалено,3.0,Upper-Intermediate,Домен: Machine Learning / Big Data,Our partner is a one-stop custom software deve...,,5600.0,Україна,
401,https://djinni.co/jobs/605997-senior-data-scie...,Eastern Peak,Senior Data Science / Machine Learning Engineer,,"Україна, Ізраїль, Велика Британія",40,11.0,Data Science,Тільки віддалено,5.0,Upper-Intermediate,,We are looking for an experienced Data Science...,,,"Україна,Ізраїль,ВеликаБританія",
