This is typically used to compare the skills employees currently have against the skills required for a job or project, and identify the gaps.

In [5]:
import pandas as pd

#Sample employee skills
employee_skills = pd.DataFrame({
    'Skill': ['Python', 'SQL', 'Machine Learning', 'Communication', 'Leadership'],
    'Employee Level': [3, 4, 2, 5, 2]  # Assume 1-5 scale
})

#Required skills for a role/project
required_skills = pd.DataFrame({
    'Skill': ['Python', 'SQL', 'Machine Learning', 'Communication', 'Leadership', 'Project Management'],
    'Required Level': [4, 4, 4, 4, 3, 4]
})

#Merge datasets to align skills
merged = pd.merge(required_skills, employee_skills, on='Skill', how='left').fillna(0)

#Calculate the skill gap
merged['Skill Gap'] = merged['Required Level'] - merged['Employee Level']

#Highlight gaps
merged['Gap Status'] = merged['Skill Gap'].apply(
    lambda x: 'Gap' if x > 0 else ('Overqualified' if x < 0 else 'Met')
)

#Display results
print("Skill Gap Analysis:")
print(merged)

# Optional: Filter only gaps
print("\nSkills with Gaps:")
print(merged[merged['Gap Status'] == 'Gap'])

Skill Gap Analysis:
                Skill  Required Level  Employee Level  Skill Gap  \
0              Python               4             3.0        1.0   
1                 SQL               4             4.0        0.0   
2    Machine Learning               4             2.0        2.0   
3       Communication               4             5.0       -1.0   
4          Leadership               3             2.0        1.0   
5  Project Management               4             0.0        4.0   

      Gap Status  
0            Gap  
1            Met  
2            Gap  
3  Overqualified  
4            Gap  
5            Gap  

Skills with Gaps:
                Skill  Required Level  Employee Level  Skill Gap Gap Status
0              Python               4             3.0        1.0        Gap
2    Machine Learning               4             2.0        2.0        Gap
4          Leadership               3             2.0        1.0        Gap
5  Project Management               4          

In [2]:
import numpy as np

# Example data (as lists)
required = np.array([4, 4, 4, 4, 3, 4])
employee = np.array([3, 4, 2, 5, 2, 0])

# Skill gap calculation
gap = required - employee

# Summary
print("Skill Gap Array:", gap)
print("Number of gaps:", np.sum(gap > 0))


Skill Gap Array: [ 1  0  2 -1  1  4]
Number of gaps: 4


Weighted Skill Gap Analysis

In [3]:
import pandas as pd

# Skill data with weights
skills = pd.DataFrame({
    'Skill': ['Python', 'SQL', 'ML', 'Communication'],
    'Required': [4, 4, 4, 4],
    'Employee': [3, 4, 2, 5],
    'Weight': [3, 2, 4, 1]
})

# Weighted gap
skills['Gap'] = (skills['Required'] - skills['Employee']) * skills['Weight']

# Total weighted gap
total_gap = skills['Gap'].sum()
print("Weighted Skill Gap:", total_gap)


Weighted Skill Gap: 10


Handle multiple employees at once using a matrix (pivot table)

In [6]:
# Sample data
data = {
    'Employee': ['Alice', 'Bob', 'Alice', 'Bob'],
    'Skill': ['Python', 'Python', 'SQL', 'SQL'],
    'Employee Level': [3, 4, 4, 2]
}

df = pd.DataFrame(data)
required = pd.DataFrame({'Skill': ['Python', 'SQL'], 'Required Level': [4, 4]})

# Pivot employee skill matrix
pivot = df.pivot(index='Employee', columns='Skill', values='Employee Level').fillna(0)

# Merge with required skills
for skill in required['Skill']:
    pivot[f'{skill} Gap'] = required[required['Skill'] == skill]['Required Level'].values[0] - pivot[skill]

print(pivot)


Skill     Python  SQL  Python Gap  SQL Gap
Employee                                  
Alice          3    4           1        0
Bob            4    2           0        2
