In [2]:
# =========================================
# Skill Gap Analyzer
# 07_skillgap_features.ipynb
# Cell 0: Load Cleaned Skill Data
# =========================================

import pandas as pd

# Reload cleaned datasets
job_df = pd.read_csv("skillgap_job_cleaned.csv")
resume_df = pd.read_csv("skillgap_resume_cleaned.csv")

# Convert string lists back to Python lists
job_df["job_skills"] = job_df["job_skills"].apply(eval)
resume_df["resume_skills"] = resume_df["resume_skills"].apply(eval)

print("Job DF shape:", job_df.shape)
print("Resume DF shape:", resume_df.shape)

print("\nSample job skills:")
print(job_df["job_skills"].iloc[0])

print("\nSample resume skills:")
print(resume_df["resume_skills"].iloc[0])


Job DF shape: (22000, 2)
Resume DF shape: (962, 2)

Sample job skills:
['microsoft office', 'technical support', 'remote desktop']

Sample resume skills:
['machine learning', 'numpy', 'sql', 'tableau', 'git', 'pandas', 'css', 'java', 'python', 'html', 'deep learning', 'scikit', 'computer vision', 'docker', 'javascript']


In [5]:
# =========================================
# Cell 2: Save Skill Gap Feature Dataset
# =========================================

import os

# Create directory if not exists
os.makedirs("/content/data/features", exist_ok=True)

# Save dataset
skillgap_features_df.to_csv(
    "/content/data/features/skillgap_features.csv",
    index=False
)

print("✅ Skill Gap feature dataset saved successfully!")
print("Saved at: /content/data/features/skillgap_features.csv")


records = []

# Use first job profile as reference (demo)
job_required_skills = set(job_df["job_skills"].iloc[0])

N = 100  # number of resumes to process (scalable)

for i in range(min(N, len(resume_df))):
    candidate_skills = set(resume_df["resume_skills"].iloc[i])
    missing_skills = job_required_skills - candidate_skills

    if len(job_required_skills) > 0:
        readiness_score = (
            (len(job_required_skills) - len(missing_skills))
            / len(job_required_skills)
        ) * 100
    else:
        readiness_score = 0

    records.append({
        "resume_category": resume_df["Category"].iloc[i],
        "job_required_skills": list(job_required_skills),
        "candidate_skills": list(candidate_skills),
        "missing_skills": list(missing_skills),
        "readiness_score": round(readiness_score, 2)
    })

skillgap_features_df = pd.DataFrame(records)

print("Skill Gap Feature Dataset Shape:", skillgap_features_df.shape)
skillgap_features_df.head()


✅ Skill Gap feature dataset saved successfully!
Saved at: /content/data/features/skillgap_features.csv
Skill Gap Feature Dataset Shape: (100, 5)


Unnamed: 0,resume_category,job_required_skills,candidate_skills,missing_skills,readiness_score
0,Data Science,"[microsoft office, remote desktop, technical s...","[sql, machine learning, deep learning, scikit,...","[microsoft office, remote desktop, technical s...",0.0
1,Data Science,"[microsoft office, remote desktop, technical s...","[machine learning, git, sklearn, aws, keras, p...","[microsoft office, remote desktop, technical s...",0.0
2,Data Science,"[microsoft office, remote desktop, technical s...","[machine learning, excel, deep learning, linux...","[microsoft office, remote desktop, technical s...",0.0
3,Data Science,"[microsoft office, remote desktop, technical s...","[machine learning, deep learning, git, python,...","[microsoft office, remote desktop, technical s...",0.0
4,Data Science,"[microsoft office, remote desktop, technical s...","[python, java, data analysis]","[microsoft office, remote desktop, technical s...",0.0


In [4]:
# =========================================
# Cell 2: Save Skill Gap Feature Dataset
# =========================================

import os

# Create directory if not exists
os.makedirs("/content/data/features", exist_ok=True)

# Save dataset
skillgap_features_df.to_csv(
    "/content/data/features/skillgap_features.csv",
    index=False
)

print("✅ Skill Gap feature dataset saved successfully!")
print("Saved at: /content/data/features/skillgap_features.csv")


✅ Skill Gap feature dataset saved successfully!
Saved at: /content/data/features/skillgap_features.csv
