## Libraries

In [1]:
import pandas as pd

## Import Dataset

In [2]:
df = pd.read_csv("jobs_dataset_with_features.csv")

## EDA

In [3]:
df.sample(10)

Unnamed: 0,Role,Features
37895,HVAC Systems Designer,5 to 15 Years Mechanical Designer BCA HVAC sys...
3266,Brand Marketing Analyst,4 to 11 Years Brand Manager PhD Marketing anal...
600932,Content Creator,0 to 15 Years Social Media Coordinator M.Tech ...
1072307,Performance Tester,4 to 10 Years Software Tester MBA Performance ...
1201491,Nursing Director,4 to 10 Years Nurse Manager BBA Nursing manage...
1172197,Legal Assistant,2 to 13 Years Legal Secretary M.Tech Legal res...
147279,Manufacturing Engineer,1 to 11 Years Mechanical Engineer B.Tech Manuf...
368779,Executive Assistant,2 to 10 Years Administrative Assistant M.Tech ...
90306,Purchasing Coordinator,1 to 10 Years Procurement Coordinator PhD Proc...
479946,Personal Assistant,3 to 10 Years Executive Assistant B.Com Organi...


In [4]:
df.shape

(1615940, 2)

In [5]:
df['Role'].value_counts()

Interaction Designer            20580
Network Administrator           17470
User Interface Designer         14036
Social Media Manager            13945
User Experience Designer        13935
                                ...  
Inventory Control Specialist     3342
Budget Analyst                   3335
Clinical Nurse Manager           3324
Social Science Researcher        3321
Paid Advertising Specialist      3306
Name: Role, Length: 376, dtype: int64

In [6]:
# Dropping classes with less than 6500 instances
min_count = 5000
role_counts = df['Role'].value_counts()
dropped_classes = role_counts[role_counts < min_count].index
filtered_df = df[~df['Role'].isin(dropped_classes)].reset_index(drop=True)

# Checking the updated role counts
filtered_df['Role'].value_counts()

Interaction Designer          20580
Network Administrator         17470
User Interface Designer       14036
Social Media Manager          13945
User Experience Designer      13935
                              ...  
Benefits Coordinator           6839
Research Analyst               6830
Administrative Coordinator     6803
IT Support Specialist          6799
UI/UX Designer                 6743
Name: Role, Length: 61, dtype: int64

In [7]:
df['Role'].value_counts()

Interaction Designer            20580
Network Administrator           17470
User Interface Designer         14036
Social Media Manager            13945
User Experience Designer        13935
                                ...  
Inventory Control Specialist     3342
Budget Analyst                   3335
Clinical Nurse Manager           3324
Social Science Researcher        3321
Paid Advertising Specialist      3306
Name: Role, Length: 376, dtype: int64

In [8]:
len(filtered_df['Role'].value_counts())

61

In [9]:
l = []

for role in df['Role']:
    l.append(role)

print(l[:500])


['Social Media Manager', 'Frontend Web Developer', 'Quality Control Manager', 'Wireless Network Engineer', 'Conference Manager', 'Quality Assurance Analyst', 'Classroom Teacher', 'User Interface Designer', 'Interaction Designer', 'Wedding Consultant', 'Performance Testing Specialist', 'Family Law Attorney', 'Mechanical Design Engineer', 'Network Security Analyst', 'Sales Account Manager', 'Product Brand Manager', 'School Social Worker', 'Content Creator', 'Deliverability Analyst', 'HR Coordinator', 'Legal Secretary', 'Family Nurse Practitioner', 'Account Strategist', 'Backend Developer', 'Supply Chain Coordinator', 'Supply Chain Coordinator', 'B2B Sales Consultant', 'Structural Engineer', 'Security Operations Center (SOC) Analyst', 'Front-End Developer', 'Tax Planner', 'Event Coordinator', 'Clinical Psychologist', 'Electrical Engineer', 'Lighting Designer', 'Business Intelligence Analyst', 'Content Writer', 'User Interface Designer', 'Personal Tax Consultant', 'Automation Tester', 'Sal

In [10]:
df = filtered_df.sample(n=10000)

In [11]:
df.head()

Unnamed: 0,Role,Features
457983,Automation Tester,4 to 8 Years Software Tester M.Com Test automa...
24999,Data Analyst,1 to 11 Years Marketing Analyst BBA Data analy...
406962,Systems Administrator,5 to 9 Years Network Administrator M.Com Syste...
116109,Content Strategist,1 to 8 Years Content Writer B.Tech Content str...
494289,UX/UI Designer,0 to 10 Years UI Developer M.Com User experien...


## TF-IDF Vectorization

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Splitting the data into features (X) and target (y)
X = df['Features']
y = df['Role']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# TF-IDF vectorization
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

## Model Initialization

In [13]:
# RandomForestClassifier
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train_tfidf, y_train)

RandomForestClassifier()

In [14]:
# Predictions
y_pred = rf_classifier.predict(X_test_tfidf)


In [15]:
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


## Recommendation

In [16]:
import re

# Clean resume function
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', ' ', cleanText)  
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText

# Prediction and Category Name - Recommend top 5 jobs
def job_recommendation(resume_text):
    # Clean the resume text
    resume_text = cleanResume(resume_text)
    # Transform the cleaned text with the vectorizer
    resume_tfidf = tfidf_vectorizer.transform([resume_text])
    
    # Get probabilities for each class
    probabilities = rf_classifier.predict_proba(resume_tfidf)[0]
    # Get the top 5 categories based on probability scores
    top_5_indices = probabilities.argsort()[-5:][::-1]
    top_5_categories = [rf_classifier.classes_[index] for index in top_5_indices]
    
    return top_5_categories

In [17]:
# Example-1

resume_file = """Objective:
A creative and detail-oriented Designer with a passion for visual communication and brand identity seeking opportunities to leverage design skills in a dynamic and collaborative environment.

Education:
- Bachelor of Fine Arts in Graphic Design, XYZ College, GPA: 3.7/4.0
- Diploma in Web Design, ABC Institute, GPA: 3.9/4.0

Skills:
- Proficient in Adobe Creative Suite (Photoshop, Illustrator, InDesign)
- Strong understanding of typography, layout, and color theory
- Experience in both print and digital design
- Ability to conceptualize and execute design projects from concept to completion
- Excellent attention to detail and time management skills

Experience:
Graphic Designer | XYZ Design Studio
- Created visually appealing graphics for various marketing materials, including brochures, flyers, and social media posts
- Collaborated with clients to understand their design needs and deliver creative solutions that align with their brand identity
- Worked closely with the marketing team to ensure consistency in brand messaging across all platforms

Freelance Designer
- Designed logos, branding materials, and website layouts for small businesses and startups
- Managed multiple projects simultaneously while meeting tight deadlines and maintaining quality standards
- Established and maintained strong client relationships through clear communication and exceptional service

Projects:
- Rebranding Campaign for XYZ Company: Led a team to redesign the company's logo, website, and marketing collateral, resulting in a 30% increase in brand recognition
- Packaging Design for ABC Product Launch: Developed eye-catching packaging designs for a new product line, contributing to a successful launch and positive customer feedback

Certifications:
- Adobe Certified Expert (ACE) in Adobe Illustrator
- Responsive Web Design Certification from Udemy

Languages:
- English (Native)
- Spanish (Intermediate)
"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: ['Retirement Planner', 'User Interface Designer', 'UI/UX Designer', 'User Experience Designer', 'Network Administrator']


In [18]:
# Example-2

resume_file = """Objective:
Dedicated and results-oriented Banking professional with a strong background in financial analysis and customer service seeking opportunities to contribute to a reputable financial institution. Eager to leverage expertise in risk management, investment strategies, and relationship building to drive business growth and client satisfaction.

Education:
- Bachelor of Business Administration in Finance, XYZ University, GPA: 3.8/4.0
- Certified Financial Analyst (CFA) Level I Candidate

Skills:
- Proficient in financial modeling and analysis using Excel, Bloomberg Terminal, and other financial software
- Extensive knowledge of banking products and services, including loans, mortgages, and investment products
- Strong understanding of regulatory compliance and risk management practices in the banking industry
- Excellent communication and interpersonal skills, with a focus on building rapport with clients and colleagues
- Ability to work efficiently under pressure and adapt to changing market conditions

Experience:
Financial Analyst | ABC Bank
- Conducted financial analysis and risk assessment for corporate clients, including credit analysis, financial statement analysis, and cash flow modeling
- Developed customized financial solutions to meet clients' needs and objectives, resulting in increased revenue and client retention
- Collaborated with cross-functional teams to identify new business opportunities and optimize existing processes

Customer Service Representative | DEF Bank
- Provided exceptional customer service to bank clients, addressing inquiries, resolving issues, and promoting banking products and services
- Processed transactions accurately and efficiently, including deposits, withdrawals, and account transfers
- Educated customers on various banking products and services, helping them make informed financial decisions

Internship | GHI Investments
- Assisted portfolio managers with investment research and analysis, including industry and company-specific research, financial modeling, and performance analysis
- Prepared investment presentations and reports for clients, highlighting investment opportunities and performance metrics
- Conducted market research and analysis to identify trends and opportunities in the financial markets

Certifications:
- Certified Financial Planner (CFP)
- Series 7 and Series 63 Securities Licenses

Languages:
- English (Native)
- Spanish (Proficient)

"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: ['Retirement Planner', 'Customer Success Manager', 'Portfolio Manager', 'Market Researcher', 'Data Analyst']


In [19]:
# Example-3

resume_file = """Objective:
Data Scientist with hands-on experience in machine learning, statistical analysis, and data visualization. Passionate about leveraging data insights to solve complex problems and support data-driven decision-making within innovative organizations.

Education:

Master of Science in Data Science, University of DEF, GPA: 3.9/4.0
Bachelor of Science in Statistics, XYZ University, GPA: 3.8/4.0
Skills:

Proficient in Python, R, SQL, and data visualization libraries (Matplotlib, Seaborn)
Experience with machine learning frameworks such as Scikit-Learn, TensorFlow, and PyTorch
Strong knowledge of statistical analysis, data mining, and predictive modeling
Skilled in data visualization and storytelling using tools like Tableau and Power BI
Familiar with Big Data technologies, including Hadoop and Spark
Experience:

Data Scientist | GHI Tech Solutions

Developed and optimized machine learning models to predict customer churn, improving retention strategies and increasing customer lifetime value
Collaborated with cross-functional teams to integrate data-driven insights into product development and marketing strategies
Created interactive dashboards and visualizations to communicate complex data insights to non-technical stakeholders
Data Science Intern | DEF Analytics

Performed exploratory data analysis (EDA) on large datasets, identifying patterns and trends to inform business decisions
Assisted in building a recommendation engine that increased product recommendations accuracy by 20%
Conducted A/B testing and statistical analysis to evaluate the impact of new features and marketing campaigns
Certifications:

IBM Data Science Professional Certificate
Certified Machine Learning Specialist
Languages:

English (Native)
Spanish (Intermediate)
"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: ['Retirement Planner', 'Automation Tester', 'Research Analyst', 'Market Research Analyst', 'Data Scientist']


In [20]:
# Example-4

resume_file = """Objective:
Results-driven HR Specialist with experience in recruitment, employee relations, and talent management. Dedicated to fostering positive workplace culture and driving organizational success through effective HR practices.

Education:

Bachelor of Arts in Human Resources Management, University of XYZ, GPA: 3.6/4.0
Skills:

Proficient in HRIS systems (e.g., Workday, ADP) and applicant tracking systems (ATS)
Strong knowledge of labor laws, employee relations, and conflict resolution
Experienced in talent acquisition, onboarding, and training
Excellent interpersonal and communication skills
Experience:

HR Specialist | ABC Corporation

Managed full-cycle recruitment for multiple departments, reducing time-to-hire by 25% through streamlined processes
Conducted onboarding sessions and employee orientation, improving new hire integration and retention
Acted as a liaison between employees and management to resolve workplace conflicts and promote positive relationships
Certifications:

SHRM Certified Professional (SHRM-CP)
Languages:

English (Native)
Spanish (Proficient)
"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: ['Benefits Coordinator', 'Procurement Manager', 'Training Coordinator', 'Network Administrator', 'Retirement Planner']


In [21]:
# Example-5

resume_file = """Objective:
Experienced and safety-focused Commercial Pilot with over 1,500 flight hours and a strong commitment to ensuring passenger safety and satisfaction. Eager to bring in-depth knowledge of aviation procedures to a reputable airline.

Education:

Bachelor of Science in Aviation, XYZ Aviation Academy
Commercial Pilot License (CPL)
Skills:

Proficient in navigation and aviation technology, including flight management systems and radar equipment
Strong knowledge of safety protocols, weather interpretation, and emergency response
Excellent situational awareness, decision-making, and communication skills
Experience:

First Officer | ABC Airlines

Operated domestic and international flights, ensuring compliance with aviation safety regulations and company standards
Collaborated with crew members to deliver excellent in-flight service and manage passenger concerns
Conducted pre-flight and post-flight inspections, identifying and reporting issues for timely resolution
Certifications:

Federal Aviation Administration (FAA) Commercial Pilot License
Instrument Rating
Languages:

English (Native)
French (Basic)
"""
predicted_category = job_recommendation(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: ['Retirement Planner', 'Network Administrator', 'Executive Assistant', 'Database Administrator', 'Systems Administrator']


## Save Models

In [22]:
import pickle
pickle.dump(rf_classifier,open('rf_classifier_job_recommendation.pkl','wb'))
pickle.dump(tfidf_vectorizer,open('tfidf_vectorizer_job_recommendation.pkl','wb'))