In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df_jobs_category = pd.read_parquet('../data/cleaned-sgjobdata-withskills.parquet')
df_jobs_category.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6263478 entries, 0 to 6263477
Data columns (total 14 columns):
 #   Column                  Dtype  
---  ------                  -----  
 0   job_id                  string 
 1   title                   string 
 2   company                 string 
 3   min_exp                 Int64  
 4   positionlevels          string 
 5   posting_date            string 
 6   num_applications        Int64  
 7   num_views               Int64  
 8   num_vacancies           Int64  
 9   categories              string 
 10  average_salary          Float64
 11  average_salary_cleaned  Float64
 12  jobtitle_cleaned        string 
 13  skill                   string 
dtypes: Float64(2), Int64(4), string(8)
memory usage: 704.9 MB


In [2]:
df_jobs_skill = pd.read_parquet('../data/cleaned-sgjobdata-withskills.parquet')
df_jobs_skill.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6263478 entries, 0 to 6263477
Data columns (total 13 columns):
 #   Column                  Dtype  
---  ------                  -----  
 0   job_id                  string 
 1   title                   string 
 2   company                 string 
 3   min_exp                 Int64  
 4   positionlevels          string 
 5   num_applications        Int64  
 6   num_views               Int64  
 7   num_vacancies           Int64  
 8   categories              string 
 9   average_salary          Float64
 10  average_salary_cleaned  Float64
 11  jobtitle_cleaned        string 
 12  skill                   string 
dtypes: Float64(2), Int64(4), string(7)
memory usage: 657.1 MB


In [3]:
df_jobs_skill.skill.value_counts()

skill
Stakeholder Management                     156298
Change Management                           91700
Business Negotiation                        90049
Project Management                          69485
Market Research                             67281
                                            ...  
Capital Management                              1
Rolling Stock Car Body Maintenance              1
Rolling Stock Brake Systems Maintenance         1
Rolling Stock Bogie Maintenance                 1
Immersive Video Editing                         1
Name: count, Length: 2053, dtype: Int64

In [5]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Group by year_month and skill, count unique job postings
df_jobs_skill['year_month'] = df_jobs_skill['posting_date'].dt.to_period('M')
skill_by_month = df_jobs_skill.groupby(['year_month', 'skill'])['job_id'].nunique().reset_index()
skill_by_month['year_month'] = skill_by_month['year_month'].astype(str)

# Get unique months sorted
months = sorted(skill_by_month['year_month'].unique())

# Create frames for animation
frames = []
for month in months:
    month_data = skill_by_month[skill_by_month['year_month'] == month].nlargest(10, 'job_id')
    month_data = month_data.sort_values('job_id', ascending=True)
    frames.append(go.Frame(
        data=[go.Bar(y=month_data['skill'], x=month_data['job_id'], orientation='h')],
        name=month
    ))

# Create initial frame
initial_month = months[0]
initial_data = skill_by_month[skill_by_month['year_month'] == initial_month].nlargest(10, 'job_id')
initial_data = initial_data.sort_values('job_id', ascending=True)

fig = go.Figure(
    data=[go.Bar(y=initial_data['skill'], x=initial_data['job_id'], orientation='h')],
    frames=frames
)

fig.update_layout(
    title=f'Top 10 Skills by Month: {initial_month}',
    xaxis_title='Number of Unique Job Postings',
    yaxis_title='Skill',
    height=600,
    width=1000,
    sliders=[{
        'active': 0,
        'steps': [{'args': [[f.name], {'frame': {'duration': 300}, 'mode': 'immediate'}],
                   'label': f.name, 'method': 'animate'} for f in frames],
        'transition': {'duration': 300}
    }]
)

fig.show()