In [1]:
import pandas as pd, skill_cat, multiprocessing.dummy
from jobs_skills_weights import *

In [2]:
skill_cat._refresh()
stemming._refresh()

In [3]:
jobs = get_jobs(bookmarked=False)

In [4]:
len(jobs)

100

In [5]:
jobs.head(3)

Unnamed: 0_level_0,company_name,location,role,url,excitement
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bd59469c-29f4-41b9-9001-5b7902d766e1,Ansys,"Madrid, Community of Madrid, Spain",R&D Engineer II - Python f/m,https://www.linkedin.com/jobs/search/?currentJ...,4
0124e218-2d33-41e8-907b-5228ea386455,Treibacher Industrie AG,"Althofen, Carinthia, Austria",AI/ML Specialist – Software Engineer (m/f/d),https://www.linkedin.com/jobs/search/?geoId=91...,4
35b60a7d-f9a3-4f63-b8ab-4db8db49fca8,Helsing,"Paris, Île-de-France, France",Deployed AI Engineer,https://www.linkedin.com/jobs/search/?currentJ...,5


# What is the goal here?
0. Most Teal skills are categorized into Resume skills
1. Teal skills get high priority in the resume
2. Resume skills in popular categories get medium priority in the resume

In [6]:
pooldict = lambda func, keys, pool: dict(zip(keys, pool.map(func, keys)))
pool = multiprocessing.dummy.Pool()

In [7]:
raw_skills_data = pooldict(job_skills, jobs.index, pool)

In [29]:
skill_cat._refresh()
stemming._refresh()
# Use the refreshed categories and stemming configurations to parse the (already downloaded) data
jobs_skills_data = pd.concat(
    {key: parse_skills_data(value) for key, value in raw_skills_data.items()},
    names=["id", "teal category", "skill name"]
)

# Filter out unusable entries
# ignored_teal_categories=set()
ignored_teal_categories={"emphasis", "company attributes"}

with open("personal_track/skipped_skills.md", 'r') as skipped_skills_f: 
    # *.md only so that I get text highlighting in an IDE
    skipped_skills = set(line for line in skipped_skills_f.read().splitlines()
                            if len(line) and line[0] != '#')
jobs_skills_data = jobs_skills_data.query(
    "`teal category` not in @ignored_teal_categories and not (`skill name`.isin(@skipped_skills))"
).copy(deep=True)

# Construct a globally-weighted 'score' for each row, to allow global summing
total_excitement = jobs["excitement"].sum()
job_count_sums = jobs_skills_data["count"].groupby("id").sum()

jobs_skills_data["share of total excitement"] = (
    (jobs_skills_data["count"] / job_count_sums) *\
    jobs["excitement"]) / total_excitement

# Enable grouping by whether the skill has a category (shorthand for whether I have it in my resume)
jobs_skills_data["is categorized"] = [
    key[2] in skill_cat.skill_to_categories 
        for key in jobs_skills_data.index]

In [30]:
jobs_skills_data.groupby(["is categorized", "teal category", "skill name"])\
    .sum()\
    .loc[(False, slice(None), slice(None)), :]\
    .droplevel(0)\
    .sort_values("share of total excitement", ascending=False)\
    .head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,share of total excitement
teal category,skill name,Unnamed: 2_level_1,Unnamed: 3_level_1
functional abilities,events,19,0.003716
functional abilities,computer vision,30,0.003676
general abilities,best practices,21,0.003098
general abilities,recruitment,16,0.002853
general abilities,passion,18,0.002656
functional abilities,signal processing,13,0.002416
functional abilities,interview process,8,0.002334
functional abilities,acquisition,14,0.002236
functional abilities,development experience,11,0.002069
functional abilities,sustainability,9,0.002049


In [12]:
teal_category = "general abilities"
skill_name = "management"
jobs.join(jobs_skills_data.query("`skill name` == @skill_name and `teal category` == @teal_category"), how="left").sort_values("count", ascending=False)[["url", "count"]].values.tolist()

[]

In [13]:
skill_name = "relationships"
jobs.join(jobs_skills_data.query("`skill name` == @skill_name"), how="left").sort_values("count", ascending=False)[["url", "count"]].values.tolist()

[]

In [25]:
print("Share of Total Excitement which fits a category:\n\n")
print((jobs_skills_data[jobs_skills_data["is categorized"]][["share of total excitement"]].sum() / jobs_skills_data[["share of total excitement"]].sum()).fillna(0))
(jobs_skills_data[jobs_skills_data["is categorized"]].groupby("teal category").sum() / jobs_skills_data.groupby("teal category").sum())[["share of total excitement"]].fillna(0)

Share of Total Excitement which fits a category:


share of total excitement    0.585257
dtype: float64


Unnamed: 0_level_0,share of total excitement
teal category,Unnamed: 1_level_1
certifications,0.0
functional abilities,0.50236
general abilities,0.696839
platform,0.756946


In [27]:
print("Density of categorized counts vs all counts:\n\n")
print((jobs_skills_data[jobs_skills_data["is categorized"]][["count"]].mean() / jobs_skills_data[["count"]].mean()).fillna(0))
(jobs_skills_data[jobs_skills_data["is categorized"]].groupby("teal category").mean() / jobs_skills_data.groupby("teal category").mean())[["count"]].fillna(0)

Density of categorized counts vs all counts:


count    1.27767
dtype: float64


Unnamed: 0_level_0,count
teal category,Unnamed: 1_level_1
certifications,0.0
functional abilities,1.269034
general abilities,1.290344
platform,1.0725
