In [1]:
import pandas as pd
data = pd.read_csv("jobs.csv")

In [2]:
data.head()

Unnamed: 0,Role,Features
0,Social Media Manager,5 to 15 Years Digital Marketing Specialist M.T...
1,Frontend Web Developer,"2 to 12 Years Web Developer BCA HTML, CSS, Jav..."
2,Quality Control Manager,0 to 12 Years Operations Manager PhD Quality c...
3,Wireless Network Engineer,4 to 11 Years Network Engineer PhD Wireless ne...
4,Conference Manager,1 to 12 Years Event Manager MBA Event planning...


In [3]:
data.shape

(1615940, 2)

In [4]:
min_count = 6500
role_counts = data['Role'].value_counts()
dropped_classes = role_counts[role_counts < min_count].index
filtered_data = data[~data['Role'].isin(dropped_classes)].reset_index(drop=True)

filtered_data['Role'].value_counts()

Role
Interaction Designer          20580
Network Administrator         17470
User Interface Designer       14036
Social Media Manager          13945
User Experience Designer      13935
                              ...  
Benefits Coordinator           6839
Research Analyst               6830
Administrative Coordinator     6803
IT Support Specialist          6799
UI/UX Designer                 6743
Name: count, Length: 61, dtype: int64

In [5]:
len(filtered_data['Role'].value_counts())

61

In [6]:
data = filtered_data.sample(n=10000)

In [7]:
data.head()

Unnamed: 0,Role,Features
439746,Account Executive,4 to 14 Years Sales Representative MCA Sales s...
387030,User Experience Designer,5 to 9 Years UX/UI Designer B.Com User-centere...
90330,Database Administrator,1 to 13 Years Systems Administrator BBA Databa...
115577,DevOps Engineer,1 to 8 Years Software Engineer MCA Automation ...
480321,Sustainable Design Specialist,3 to 9 Years Architect BBA Sustainable design ...


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

X = data['Features']
y = data['Role']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [9]:
rfc = RandomForestClassifier()
rfc.fit(X_train_tfidf, y_train)

In [10]:

y_pred = rfc.predict(X_test_tfidf)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [14]:
import re
import warnings
warnings.filterwarnings("ignore")
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)  
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText


def job_recommendation(resume_text):
    resume_text= cleanResume(resume_text)
    resume_tfidf = tfidf_vectorizer.transform([resume_text])
    predicted_category = rfc.predict(resume_tfidf)[0]
    return predicted_category

In [15]:
resume='''
Objective:
Dedicated and driven legal professional with a passion for advocating for justice and defending the rights of clients. Seeking a challenging position as an Advocate in a reputable law firm where I can apply my expertise in litigation, negotiation, and legal research to provide effective representation and achieve favorable outcomes for clients. Committed to upholding the principles of integrity, professionalism, and ethical conduct in all legal proceedings.

Education:
- Juris Doctor (JD), Law School, XYZ University
- Bachelor of Arts in Political Science, ABC College, GPA: 3.7/4.0
- Licensed Attorney, State Bar Association

Skills:
- Extensive experience in civil and criminal litigation, representing clients in court hearings, trials, and appellate proceedings
- Strong research and analytical skills, conducting legal research, drafting legal documents, and preparing persuasive arguments
- Excellent oral and written communication skills, articulating legal concepts and arguments effectively to judges, juries, and opposing counsel
- Proven track record of negotiating favorable settlements and agreements on behalf of clients, resolving disputes through mediation and arbitration
- Ability to work collaboratively with clients, colleagues, and experts to develop comprehensive legal strategies and achieve successful outcomes

Experience:
Associate Attorney | Law Firm LLP
- Handled a diverse caseload of civil and criminal matters, including personal injury, family law, employment law, and criminal defense
- Conducted legal research, drafted pleadings, motions, briefs, and other legal documents, and prepared for court hearings and trials
- Represented clients in negotiations, mediations, and settlement conferences, securing favorable outcomes and settlements
- Assisted senior attorneys in trial preparation, witness preparation, and courtroom presentations, contributing to successful trial outcomes

Legal Intern | Public Defender's Office
- Assisted attorneys in the defense of indigent clients in criminal cases, conducting legal research, drafting motions, and preparing for court appearances
- Interviewed clients, witnesses, and law enforcement officers, gathering evidence and information to support defense strategies
- Observed courtroom proceedings, including arraignments, hearings, and trials, and assisted in trial preparation and case management
- Participated in client counseling sessions, explaining legal rights, options, and potential outcomes to clients facing criminal charges

Law Clerk | Legal Aid Organization
- Provided legal assistance to low-income individuals and families in civil matters, such as landlord-tenant disputes, consumer rights, and family law matters
- Conducted intake interviews, assessed clients' legal needs, and provided information about available legal services and resources
- Drafted legal documents, including pleadings, petitions, and correspondence, and assisted clients in completing court forms and applications
- Collaborated with attorneys and social workers to address clients' legal and non-legal needs, connecting them with community resources and support services

Certifications:
- Certified Mediator, State Bar Association
- Trial Advocacy Certificate, Law School

Languages:
- English (Native)
- Spanish (Proficient)

'''

predicted_category = job_recommendation(resume)
print("Predicted Category:", predicted_category)

Predicted Category: Paralegal


In [16]:
import pickle
pickle.dump(rfc,open('rfc_job_recommendation.pkl','wb'))
pickle.dump(tfidf_vectorizer,open('tfidf_vectorizer_job_recommendation.pkl','wb'))