In [2]:
print("Model Building")
print("Matching the skill set from the resume with the actual needed skill set")

Model Building
Matching the skill set from the resume with the actual needed skill set


In [4]:
# Preprocessing

import pandas as pd
import ast
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

# Load dataset
df = pd.read_csv("ultimate_roles_skills_dataset.csv")  # Replace with your file path
df['Skills'] = df['Skills'].apply(ast.literal_eval)

# Encode skills
mlb = MultiLabelBinarizer()
X = mlb.fit_transform(df['Skills'])
y = df['Role']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_preds = rf.predict(X_test)

print("Random Forest Accuracy:", accuracy_score(y_test, rf_preds))
print("Classification Report:\n", classification_report(y_test, rf_preds))


Random Forest Accuracy: 1.0
Classification Report:
                            precision    recall  f1-score   support

              AI Engineer       1.00      1.00      1.00         6
            AI Researcher       1.00      1.00      1.00         8
             BI Developer       1.00      1.00      1.00        14
         Business Analyst       1.00      1.00      1.00        14
           Cloud Engineer       1.00      1.00      1.00         9
              DL Engineer       1.00      1.00      1.00         6
             Data Analyst       1.00      1.00      1.00         9
            Data Engineer       1.00      1.00      1.00         7
           Data Scientist       1.00      1.00      1.00        10
          DevOps Engineer       1.00      1.00      1.00        12
         Digital Marketer       1.00      1.00      1.00         7
     Full Stack Developer       1.00      1.00      1.00         9
         Graphic Designer       1.00      1.00      1.00        13
         

In [6]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
lr_preds = lr.predict(X_test)

print("Logistic Regression Accuracy:", accuracy_score(y_test, lr_preds))
print("Classification Report:\n", classification_report(y_test, lr_preds))


Logistic Regression Accuracy: 1.0
Classification Report:
                            precision    recall  f1-score   support

              AI Engineer       1.00      1.00      1.00         6
            AI Researcher       1.00      1.00      1.00         8
             BI Developer       1.00      1.00      1.00        14
         Business Analyst       1.00      1.00      1.00        14
           Cloud Engineer       1.00      1.00      1.00         9
              DL Engineer       1.00      1.00      1.00         6
             Data Analyst       1.00      1.00      1.00         9
            Data Engineer       1.00      1.00      1.00         7
           Data Scientist       1.00      1.00      1.00        10
          DevOps Engineer       1.00      1.00      1.00        12
         Digital Marketer       1.00      1.00      1.00         7
     Full Stack Developer       1.00      1.00      1.00         9
         Graphic Designer       1.00      1.00      1.00        13
   

In [7]:
# SVM
from sklearn.svm import SVC

svm = SVC()
svm.fit(X_train, y_train)
svm_preds = svm.predict(X_test)

print("SVM Accuracy:", accuracy_score(y_test, svm_preds))
print("Classification Report:\n", classification_report(y_test, svm_preds))


SVM Accuracy: 1.0
Classification Report:
                            precision    recall  f1-score   support

              AI Engineer       1.00      1.00      1.00         6
            AI Researcher       1.00      1.00      1.00         8
             BI Developer       1.00      1.00      1.00        14
         Business Analyst       1.00      1.00      1.00        14
           Cloud Engineer       1.00      1.00      1.00         9
              DL Engineer       1.00      1.00      1.00         6
             Data Analyst       1.00      1.00      1.00         9
            Data Engineer       1.00      1.00      1.00         7
           Data Scientist       1.00      1.00      1.00        10
          DevOps Engineer       1.00      1.00      1.00        12
         Digital Marketer       1.00      1.00      1.00         7
     Full Stack Developer       1.00      1.00      1.00         9
         Graphic Designer       1.00      1.00      1.00        13
            HR Spec

In [8]:
# Example (assuming you've trained Random Forest model)
import joblib

# Save
joblib.dump(rf, "job_role_predictor.pkl")
joblib.dump(mlb, "skill_binarizer.pkl")


['skill_binarizer.pkl']

In [9]:
import joblib

model = joblib.load("job_role_predictor.pkl")
mlb = joblib.load("skill_binarizer.pkl")


In [10]:
# Example user skills
sample_skills = ['Artificial Intelligence',"data visualization", 'Data Analytics', 'Natural Language Processing (NLP)']

# Transform using same skill binarizer
sample_vector = mlb.transform([sample_skills])

# Predict role
predicted_role = model.predict(sample_vector)
print("Predicted Role:", predicted_role[0])


Predicted Role: AI Engineer


