In [1]:
import pandas as pd

df = pd.read_csv('../data/jobs_dataset_with_features.csv')
df.head()

Unnamed: 0,Role,Features
0,Social Media Manager,5 to 15 Years Digital Marketing Specialist M.T...
1,Frontend Web Developer,"2 to 12 Years Web Developer BCA HTML, CSS, Jav..."
2,Quality Control Manager,0 to 12 Years Operations Manager PhD Quality c...
3,Wireless Network Engineer,4 to 11 Years Network Engineer PhD Wireless ne...
4,Conference Manager,1 to 12 Years Event Manager MBA Event planning...


In [2]:
df.shape

(1615940, 2)

In [4]:
df['Role'].value_counts()

Role
Interaction Designer            20580
Network Administrator           17470
User Interface Designer         14036
Social Media Manager            13945
User Experience Designer        13935
                                ...  
Inventory Control Specialist     3342
Budget Analyst                   3335
Clinical Nurse Manager           3324
Social Science Researcher        3321
Paid Advertising Specialist      3306
Name: count, Length: 376, dtype: int64

In [3]:
# Dropping classes with less than 6500 instances
min_count = 6500
role_counts = df['Role'].value_counts()
dropped_classes = role_counts[role_counts < min_count].index
filtered_df = df[~df['Role'].isin(dropped_classes)].reset_index(drop=True)

# Checking the updated role counts
filtered_df['Role'].value_counts()

Role
Interaction Designer          20580
Network Administrator         17470
User Interface Designer       14036
Social Media Manager          13945
User Experience Designer      13935
                              ...  
Benefits Coordinator           6839
Research Analyst               6830
Administrative Coordinator     6803
IT Support Specialist          6799
UI/UX Designer                 6743
Name: count, Length: 61, dtype: int64

In [6]:
df = filtered_df.sample(n=10000)

In [7]:
df.shape

(10000, 2)

In [8]:
df.head()

Unnamed: 0,Role,Features
288644,Database Administrator,2 to 8 Years Systems Administrator PhD Databas...
184555,Wedding Planner,2 to 11 Years Event Planner BBA Wedding planni...
128302,Database Administrator,4 to 8 Years Systems Administrator BBA Databas...
394073,Investment Advisor,3 to 12 Years Financial Advisor MBA Investment...
116926,Social Media Manager,4 to 8 Years Digital Marketing Specialist M.Co...


In [9]:
df['Role'].value_counts()

Role
Interaction Designer        398
Network Administrator       347
Social Media Manager        260
User Experience Designer    254
Procurement Analyst         248
                           ... 
Financial Analyst           123
Supply Chain Manager        120
Water Resources Engineer    120
Wedding Planner             116
Front-End Developer         114
Name: count, Length: 61, dtype: int64


### TFIDF & LabelEncoder

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

X = df['Features']
y = df['Role']

encoder = LabelEncoder()
y_encoder = encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X,y_encoder, random_state=42, test_size=0.2)

vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

In [28]:
len(df['Role'].unique())

61

In [33]:
# Getting the labels encoded
#labels = dict(zip( encoder.transform(encoder.classes_), encoder.classes_))
labels = {int(encoded): original for encoded, original in zip(encoder.transform(encoder.classes_), encoder.classes_)}
labels

{0: 'Account Executive',
 1: 'Administrative Coordinator',
 2: 'Automation Tester',
 3: 'Backend Developer',
 4: 'Benefits Coordinator',
 5: 'Business Intelligence Analyst',
 6: 'Client Relationship Manager',
 7: 'Content Creator',
 8: 'Content Strategist',
 9: 'Customer Success Manager',
 10: 'Customer Support Specialist',
 11: 'Data Analyst',
 12: 'Data Entry Specialist',
 13: 'Data Scientist',
 14: 'Database Administrator',
 15: 'Demand Planner',
 16: 'DevOps Engineer',
 17: 'Event Coordinator',
 18: 'Event Planner',
 19: 'Executive Assistant',
 20: 'Family Law Attorney',
 21: 'Financial Analyst',
 22: 'Front-End Developer',
 23: 'Frontend Developer',
 24: 'IT Project Manager',
 25: 'IT Support Specialist',
 26: 'Inside Sales Representative',
 27: 'Interaction Designer',
 28: 'Inventory Manager',
 29: 'Investment Advisor',
 30: 'Manufacturing Engineer',
 31: 'Market Research Analyst',
 32: 'Market Researcher',
 33: 'Network Administrator',
 34: 'Network Security Analyst',
 35: 'Offi

In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

model = RandomForestClassifier(random_state=42)
model.fit(X_train_vectorized, y_train)

y_pred_test = model.predict(X_test_vectorized)
y_pred_train = model.predict(X_train_vectorized)
accuracy_test = accuracy_score(y_test, y_pred_test)
accuracy_train = accuracy_score(y_train, y_pred_train)

print('Accuracy of Train:', accuracy_train)
print('Accuracy of Test: ', accuracy_test)

Accuracy of Train: 1.0
Accuracy of Test:  1.0


## Dumping the files

In [35]:
# Dumping the pickle files
import pickle
import os
import json

artifacts_path = '../artifacts/job_recommendation'
os.makedirs(artifacts_path, exist_ok=True)
vectorizer_path = os.path.join(artifacts_path, 'vectorizer.pkl')

with open(vectorizer_path, 'wb') as file:
    pickle.dump(vectorizer, file)

# Dumping the label encoder
encoder_path = os.path.join(artifacts_path, 'encoder.pkl')
with open(encoder_path, 'wb') as file:
    pickle.dump(encoder, file) 

labels_path = os.path.join(artifacts_path, 'labels.json')
with open(labels_path, 'w') as file:
    json.dump(labels, file)

model_path = os.path.join(artifacts_path, 'model.pkl')
with open(model_path, 'wb') as file:
    pickle.dump(model, file)

In [24]:
# Example Usage
resume_file = """Objective:
A creative and detail-oriented Designer with a passion for visual communication and brand identity seeking opportunities to leverage design skills in a dynamic and collaborative environment.

Education:
- Bachelor of Fine Arts in Graphic Design, XYZ College, GPA: 3.7/4.0
- Diploma in Web Design, ABC Institute, GPA: 3.9/4.0

Skills:
- Proficient in Adobe Creative Suite (Photoshop, Illustrator, InDesign)
- Strong understanding of typography, layout, and color theory
- Experience in both print and digital design
- Ability to conceptualize and execute design projects from concept to completion
- Excellent attention to detail and time management skills

Experience:
Graphic Designer | XYZ Design Studio
- Created visually appealing graphics for various marketing materials, including brochures, flyers, and social media posts
- Collaborated with clients to understand their design needs and deliver creative solutions that align with their brand identity
- Worked closely with the marketing team to ensure consistency in brand messaging across all platforms

Freelance Designer
- Designed logos, branding materials, and website layouts for small businesses and startups
- Managed multiple projects simultaneously while meeting tight deadlines and maintaining quality standards
- Established and maintained strong client relationships through clear communication and exceptional service

Projects:
- Rebranding Campaign for XYZ Company: Led a team to redesign the company's logo, website, and marketing collateral, resulting in a 30% increase in brand recognition
- Packaging Design for ABC Product Launch: Developed eye-catching packaging designs for a new product line, contributing to a successful launch and positive customer feedback

Certifications:
- Adobe Certified Expert (ACE) in Adobe Illustrator
- Responsive Web Design Certification from Udemy

Languages:
- English (Native)
- Spanish (Intermediate)
"""

Predicted Category: 58


In [46]:
os.chdir('../')

In [47]:
import os
print(os.getcwd())  # Check current working directory


d:\Portfolio Projects\Resume Screening


In [50]:
from utils.common import job_recommendation

predicted = job_recommendation(resume_file, vectorizer, model, labels)
print("Predicted Category:", predicted)

Predicted Category: User Interface Designer
