Project: 2 Job Recommendation System

Problem Statement:
Match users with relevant jobs based on their skills and resume.

In [42]:
import os
import ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns



In [43]:
os.chdir(os.getcwd().replace('notebooks', 'data'))

df = pd.read_csv('data.csv')


In [44]:
df.columns = df.columns.str.replace(' ', '_')


In [45]:
df.columns


Index(['company', 'education', 'experience', 'industry', 'jobdescription',
       'jobid', 'joblocation_address', 'jobtitle', 'numberofpositions',
       'payrate', 'postdate', 'site_name', 'skills', 'uniq_id'],
      dtype='object')

In [46]:
selected_columns = [
    'education',
    'experience',
    'industry',
    'jobdescription',
    'joblocation_address',
    'jobtitle',
    'skills'
]


In [47]:
df = df[selected_columns]


In [48]:
df.sample(5)


Unnamed: 0,education,experience,industry,jobdescription,joblocation_address,jobtitle,skills
20932,,0 - 5 yrs,Internet / Ecommerce,Job Description Send me Jobs like this Once ...,"Mumbai, Chennai, Pune, Hyderabad, Bengaluru, D...",Freelance Sales Manager/ Channel Partner for E...,Sales
7451,UG: Any Graduate - Any Specialization PG:Any P...,7 - 10 yrs,Banking / Financial Services / Broking,Job Description Send me Jobs like this Quali...,Delhi/NCR(National Capital Region),Manager - Collection - NBFC,Financial Services
945,,1 - 4 yrs,Recruitment / Staffing,Job Description Send me Jobs like this Dutie...,Bengaluru,Hiring for IT Sourcer for Bangalore Location.,HR
15514,UG: Any Graduate - Any Specialization PG:Any P...,11 - 16 yrs,Internet / Ecommerce,Job Description Send me Jobs like this Must ...,"Gurgaon , Bengaluru/Bangalore",Oracle Commerce ( Atg)- Manager,IT Software - eCommerce
20328,UG: Any Graduate - Any Specialization,0 - 5 yrs,Strategy / Management Consulting Firms,Job Description Send me Jobs like this This ...,Hyderabad,Hiring Recruiting Executive - Freshers Welcome,HR


In [49]:
df.shape


(22000, 7)

In [50]:
df.isna().sum()


education              1996
experience                4
industry                  5
jobdescription            4
joblocation_address     501
jobtitle                  0
skills                  528
dtype: int64

In [51]:
df.dropna(inplace=True)


In [52]:
df.shape


(19077, 7)

In [53]:
df['education'].unique()


array(['UG: B.Tech/B.E. - Any Specialization PG:Any Postgraduate - Any Specialization, Post Graduation Not Required',
       'UG: B.Tech/B.E. - Any Specialization PG:MBA/PGDM - Any Specialization',
       'UG: Any Graduate - Any Specialization PG:Any Postgraduate Doctorate:Doctorate Not Required',
       ..., 'PG:MS/M.Sc(Science) - Any Specialization, Statistics',
       'UG: B.Tech/B.E. - Any Specialization PG:MCA - Computers, M.Tech - Any Specialization, MS/M.Sc(Science) - Any Specialization Doctorate:Doctorate Not Required',
       'UG: B.Tech/B.E. - Computers PG:MS/M.Sc(Science) - Computers Doctorate:Doctorate Not Required'],
      dtype=object)

In [54]:
df['education'].nunique()


2265

In [55]:
df['education'] = df['education'].apply(lambda x: [c.replace(' ', '') for c in x.split(', ')])


In [56]:
df['experience'].unique()


array(['0 - 1 yrs', '0 - 0 yrs', '4 - 8 yrs', '11 - 15 yrs', '6 - 8 yrs',
       '2 - 5 yrs', '2 - 7 yrs', '1 - 3 yrs', '1 - 5 yrs', '2 - 4 yrs',
       '3 - 8 yrs', '5 - 7 yrs', '5 - 10 yrs', '6 - 10 yrs',
       '10 - 12 yrs', '4 - 6 yrs', '1 - 6 yrs', '3 - 6 yrs', '3 - 7 yrs',
       '4 - 9 yrs', '3 - 5 yrs', '5 - 8 yrs', '9 - 12 yrs', '7 - 12 yrs',
       '10 - 15 yrs', '8 - 12 yrs', '10 - 16 yrs', '5 - 9 yrs',
       '0 - 5 yrs', '10 - 20 yrs', '0 - 4 yrs', '10 - 18 yrs',
       '0 - 2 yrs', '6 - 11 yrs', '2 - 3 yrs', '1 - 4 yrs', '7 - 9 yrs',
       '2 - 6 yrs', '1 - 2 yrs', '8 - 10 yrs', '0 - 3 yrs', '4 - 7 yrs',
       '7 - 10 yrs', '15 - 20 yrs', '8 - 13 yrs', '1 - 1 yrs',
       '3 - 4 yrs', '9 - 14 yrs', '6 - 9 yrs', '13 - 15 yrs',
       '15 - 25 yrs', '14 - 22 yrs', '4 - 5 yrs', '7 - 11 yrs',
       '16 - 24 yrs', '12 - 15 yrs', '7 - 8 yrs', '5 - 6 yrs',
       '20 - 25 yrs', '10 - 13 yrs', '12 - 18 yrs', '6 - 7 yrs',
       '10 - 14 yrs', '12 - 20 yrs', '15 - 17 yrs', '20

In [57]:
def convert_experience(experience : str):
    if 'yrs' in experience:
        splited_experience = experience.split(' ')
        from_range, to_range = int(splited_experience[0]), int(splited_experience[2])
        experiences = []
        for i in range(from_range, to_range+1):

            if i == 0:
                experiences.append(f'Fresher')
            elif i == 1:
                experiences.append(f'{i}YearExperience')
            else:
                experiences.append(f'{i}YearsExperience')
    else:
        experiences = ['Fresher']
    
    return experiences


In [58]:
convert_experience(df['experience'][30])


['3YearsExperience',
 '4YearsExperience',
 '5YearsExperience',
 '6YearsExperience',
 '7YearsExperience']

In [59]:
df['experience'] = df['experience'].apply(convert_experience)


In [60]:
df['industry'].unique()


array(['Media / Entertainment / Internet',
       'Advertising / PR / MR / Event Management',
       'IT-Software / Software Services',
       'Banking / Financial Services / Broking',
       'Aviation / Aerospace Firms',
       'Industrial Products / Heavy Machinery', 'FMCG / Foods / Beverage',
       'Recruitment / Staffing', 'Internet / Ecommerce',
       'Travel / Hotels / Restaurants / Airlines / Railways',
       'BPO / Call Centre / ITES', 'Pharma / Biotech / Clinical Research',
       'Real Estate / Property', 'Insurance', 'Publishing',
       'Retail / Wholesale',
       'Automobile / Auto Anciliary / Auto Components',
       'Government / Defence', 'Accounting / Finance',
       'Textiles / Garments / Accessories',
       'Semiconductors / Electronics', 'Medical / Healthcare / Hospitals',
       'Education / Teaching / Training', 'Legal',
       'Courier / Transportation / Freight / Warehousing', 'Telecom/ISP',
       'NGO / Social Services / Regulators / Industry Association

In [61]:
df['industry'] = df['industry'].apply(lambda x: [ i.replace(' ', '') for i in x.split('/')])


In [62]:
df['jobdescription'] = df['jobdescription'].apply(lambda x: x.split(' '))


In [63]:
df['joblocation_address'] = df['joblocation_address'].apply(lambda x: [ i.replace(' ', '') for i in x.split('/')])


In [64]:
df['skills'].unique()


array(['ITES', 'Marketing', 'IT Software - Application Programming',
       'Accounts', 'Production', 'Sales', 'IT Software - Other',
       'IT Software - Mobile', 'Engineering Design', 'Financial Services',
       'Hotels', 'IT Software - QA & Testing', 'HR', 'Supply Chain',
       'IT Software - Network Administration', 'Architecture', 'Legal',
       'Journalism', 'IT Software - DBA', 'Strategy', 'Design',
       'Defence Forces', 'IT Software - Mainframe',
       'IT Software - Embedded', 'IT Software - Middleware', 'Teaching',
       'Medical', 'IT Software - System Programming',
       'IT Software - Client/Server Programming', 'Site Engineering',
       'IT Software - eCommerce', 'IT Software - Telecom Software',
       'Fashion Designing', 'IT Hardware', 'IT Software - ERP',
       'Analytics & Business Intelligence', 'Executive Assistant', 'TV',
       'Top Management', 'Travel', 'Export', 'IT Software - Systems',
       'Packaging', 'Shipping', 'Beauty/Fitness/Spa Services']

In [65]:
df['skills'] = df['skills'].apply(lambda x: [x.replace(' ', '')])


In [66]:
df.sample(10)


Unnamed: 0,education,experience,industry,jobdescription,joblocation_address,jobtitle,skills
13765,[UG:AnyGraduatePG:MBA/PGDM],"[10YearsExperience, 11YearsExperience, 12Years...","[Medical, Healthcare, Hospitals]","[Job, Description, , Send, me, Jobs, like, th...","[Mumbai,maharashtra]",Head Sales MND,[Sales]
14126,[UG:B.Tech/B.E.PG:PostGraduationNotRequiredDoc...,"[3YearsExperience, 4YearsExperience, 5YearsExp...","[IT-Software, SoftwareServices]","[Job, Description, , Send, me, Jobs, like, th...",[Gurgaon],SME,[ITSoftware-ApplicationProgramming]
15412,[UG:AnyGraduate-AnySpecializationPG:M.Tech-Any...,"[3YearsExperience, 4YearsExperience, 5YearsExp...","[Automobile, AutoAnciliary, AutoComponents]","[Job, Description, , Send, me, Jobs, like, th...","[Bengaluru, Bangalore]",Diagnostic Verification Engineer,[EngineeringDesign]
20119,[UG:AnyGraduatePG:PostGraduationNotRequiredDoc...,"[2YearsExperience, 3YearsExperience, 4YearsExp...","[Textiles, Garments, Accessories]","[Job, Description, , Send, me, Jobs, like, th...","[Delhi,Delhi]",merchandiser,[FashionDesigning]
2653,[UG:B.Tech/B.E.PG:PostGraduationNotRequiredDoc...,"[2YearsExperience, 3YearsExperience, 4YearsExp...","[IT-Software, SoftwareServices]","[Job, Description, , Send, me, Jobs, like, th...","[Hyderabad, Secunderabad,Hyderabad, Secunderabad]",Android Developer,[ITSoftware-ApplicationProgramming]
10219,[UG:AnyGraduate-AnySpecializationPG:AnyPostgra...,"[2YearsExperience, 3YearsExperience, 4YearsExp...","[NGO, SocialServices, Regulators, IndustryAsso...","[Job, Description, , Send, me, Jobs, like, th...","[Faridabad,Bengaluru, Bangalore]",Sr. Fund Raising Executive Donor Services (70%...,[Marketing]
20303,[UG:AnyGraduate-AnySpecializationPG:MBA/PGDM-A...,"[Fresher, 1YearExperience, 2YearsExperience]","[BPO, CallCentre, ITES]","[Job, Description, , Send, me, Jobs, like, th...",[Hyderabad],Hiring for Voice& Accent Trainer in Hyderabad,[ITES]
11330,[UG:B.Tech/B.E.PG:PostGraduationNotRequiredDoc...,"[3YearsExperience, 4YearsExperience, 5YearsExp...","[Telecom, ISP, ]","[Job, Description, , Send, me, Jobs, like, th...","[Bengaluru, Bangalore,karnataka]",Analyst - Data Enablement,[ITSoftware-DBA]
4342,"[UG:AnyGraduate-AnySpecialization, GraduationN...","[Fresher, 1YearExperience, 2YearsExperience, 3...","[Automobile, AutoAnciliary, AutoComponents]","[Job, Description, , Send, me, Jobs, like, th...","[Bengaluru, Bangalore,Chennai,Hyderabad, Secun...",E - Test Engineer,[EngineeringDesign]
3134,[UG:B.Tech/B.E.PG:PostGraduationNotRequiredDoc...,"[2YearsExperience, 3YearsExperience, 4YearsExp...","[IT-Software, SoftwareServices]","[Job, Description, , Send, me, Jobs, like, th...","[Guntur,Hyderabad, Secunderabad]",Programmer Analysts,[ITSoftware-ApplicationProgramming]


In [68]:
df['tags'] = df['education'] + df['experience'] + df['skills'] + df['jobdescription'] + df['industry'] + df['joblocation_address']
 

In [70]:
df = df[['jobtitle', 'tags']]


In [71]:
df.sample(10)


Unnamed: 0,jobtitle,tags
8549,Content Strategist - Startup,[UG:AnyGraduate-AnySpecializationPG:AnyPostgra...
11321,"Medical Coding JOBS - North, EAST OR WEST Lead...","[UG:BDS-Dentistry, B.Pharma-Pharmacy, B.Sc-Bio..."
5656,Limited Vacancy for Aegis Ecommerce for Voice ...,[UG:AnyGraduate-AnySpecializationPG:AnyPostgra...
3100,MT - Finance CA,[UG:AnyGraduate-AnySpecializationPG:AnyPostgra...
3000,Medical Coding Transcription - We Will Help Yo...,[UG:AnyGraduate-AnySpecializationPG:AnyPostgra...
5388,Housekeeping Assistant,[UG:BHMPG:PostGraduationNotRequiredDoctorate:A...
20972,National Sales Manager - Generics Division | M...,"[UG:AnyGraduate-AnySpecialization, 10YearsExpe..."
9997,Data Analytics Manager,[UG:AnyGraduate-AnySpecializationPG:M.Tech-Com...
3223,"Job | Walk-in for ""HR Executive""_27/02/2016 (S...","[UG:AnyGraduate-AnySpecialization, GraduationN..."
3046,Automation Engineer - PLC,[UG:AnyGraduatePG:PostGraduationNotRequiredDoc...


In [73]:
df.to_csv('clean.csv')
