In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/content/clean_resume_data.csv')

In [None]:
df.head()

Unnamed: 0,ID,Category,Feature
0,16852973,HR,hr administrator marketing associate hr admini...
1,22323967,HR,hr specialist hr operations summary media prof...
2,33176873,HR,hr director summary years experience recruitin...
3,27018550,HR,hr specialist summary dedicated driven dynamic...
4,17812897,HR,hr manager skill highlights hr skills hr depar...


In [None]:
df['Category'].value_counts()

Unnamed: 0_level_0,count
Category,Unnamed: 1_level_1
INFORMATION-TECHNOLOGY,120
BUSINESS-DEVELOPMENT,120
FINANCE,118
ADVOCATE,118
ACCOUNTANT,118
ENGINEERING,118
CHEF,118
AVIATION,117
FITNESS,117
SALES,116


In [None]:
df.shape

(2484, 3)

In [None]:
from sklearn.utils import resample

max_count = df['Category'].value_counts().max()

balanced_data = []
for category in df['Category'].unique():
    category_data = df[df['Category'] == category]
    if len(category_data) < max_count:

        balanced_category_data = resample(category_data, replace=True, n_samples=max_count, random_state=42)
    else:

        balanced_category_data = resample(category_data, replace=False, n_samples=max_count, random_state=42)
    balanced_data.append(balanced_category_data)

balanced_df = pd.concat(balanced_data)

In [None]:
balanced_df['Category'].value_counts()


Unnamed: 0_level_0,count
Category,Unnamed: 1_level_1
HR,120
DESIGNER,120
ARTS,120
BANKING,120
PUBLIC-RELATIONS,120
CONSTRUCTION,120
ACCOUNTANT,120
ENGINEERING,120
APPAREL,120
FINANCE,120


In [None]:
balanced_df.dropna(inplace=True)

In [None]:
X = balanced_df['Feature']
y = balanced_df['Category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
param_grid = {
    'n_estimators': [ 100, 200],
    'max_depth': [ 20, None],
    'min_samples_split': [2,5],
    'min_samples_leaf': [1],
    'bootstrap': [True, False]
}

rf_classifier = RandomForestClassifier(random_state=42)
rf_random = RandomizedSearchCV(estimator=rf_classifier,
                                param_distributions=param_grid,
                               n_iter=100,
                                scoring='accuracy',
                               cv=3,
                                verbose=2,
                               random_state=42,
                               n_jobs=-1)

rf_random.fit(X_train_tfidf, y_train)
print("Best parameters:", rf_random.best_params_)
best_rf = rf_random.best_estimator_
y_pred = best_rf.predict(X_test_tfidf)

accuracy = accuracy_score(y_test, y_pred)
print("Optimized Accuracy:", accuracy)
print(classification_report(y_test, y_pred))
#accuracy = 0.85

Fitting 3 folds for each of 16 candidates, totalling 48 fits




Best parameters: {'n_estimators': 200, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
Optimized Accuracy: 0.8506944444444444
                        precision    recall  f1-score   support

            ACCOUNTANT       0.83      0.95      0.89        21
              ADVOCATE       1.00      0.76      0.86        29
           AGRICULTURE       0.94      0.74      0.83        23
               APPAREL       1.00      0.86      0.92        21
                  ARTS       1.00      0.64      0.78        22
            AUTOMOBILE       1.00      1.00      1.00        19
              AVIATION       0.91      1.00      0.95        30
               BANKING       1.00      0.78      0.88        23
                   BPO       1.00      1.00      1.00        15
  BUSINESS-DEVELOPMENT       0.54      0.72      0.62        18
                  CHEF       0.90      0.96      0.93        28
          CONSTRUCTION       0.89      1.00      0.94        25
   

In [None]:

feature_importances = best_rf.feature_importances_
sorted_indices = feature_importances.argsort()
plt.figure(figsize=(10, 8))
plt.barh(range(len(sorted_indices)), feature_importances[sorted_indices], align='center')
plt.xlabel('Importance')
plt.ylabel('Features')
plt.title('Feature Importance')
plt.show()


In [None]:
import re
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText)
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText

def predict_category(resume_text):
    resume_text= cleanResume(resume_text)
    resume_tfidf = tfidf_vectorizer.transform([resume_text])
    predicted_category = rf_classifier.predict(resume_tfidf)[0]
    return predicted_category

In [None]:

resume_file = """Objective:
A creative and detail-oriented Designer with a passion for visual communication and brand identity seeking opportunities to leverage design skills in a dynamic and collaborative environment.

Education:
- Bachelor of Fine Arts in Graphic Design, XYZ College, GPA: 3.7/4.0
- Diploma in Web Design, ABC Institute, GPA: 3.9/4.0

Skills:
- Proficient in Adobe Creative Suite (Photoshop, Illustrator, InDesign)
- Strong understanding of typography, layout, and color theory
- Experience in both print and digital design
- Ability to conceptualize and execute design projects from concept to completion
- Excellent attention to detail and time management skills

Experience:
Graphic Designer | XYZ Design Studio
- Created visually appealing graphics for various marketing materials, including brochures, flyers, and social media posts
- Collaborated with clients to understand their design needs and deliver creative solutions that align with their brand identity
- Worked closely with the marketing team to ensure consistency in brand messaging across all platforms

Freelance Designer
- Designed logos, branding materials, and website layouts for small businesses and startups
- Managed multiple projects simultaneously while meeting tight deadlines and maintaining quality standards
- Established and maintained strong client relationships through clear communication and exceptional service

Projects:
- Rebranding Campaign for XYZ Company: Led a team to redesign the company's logo, website, and marketing collateral, resulting in a 30% increase in brand recognition
- Packaging Design for ABC Product Launch: Developed eye-catching packaging designs for a new product line, contributing to a successful launch and positive customer feedback

Certifications:
- Adobe Certified Expert (ACE) in Adobe Illustrator
- Responsive Web Design Certification from Udemy

Languages:
- English (Native)
- Spanish (Intermediate)
"""
predicted_category = predict_category(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: DESIGNER


In [None]:

resume_file = """Chirag Jawa
 +91 9729958758 # chiragjawa55@gmail.com ï LinkedIn § Github
Summary
Highly motivated Computer Engineering student with a strong background in Python, machine learning, and web
development. Experienced in developing dynamic web applications. Proven ability in data analysis and project management.
Seeking a role to apply my technical skills and contribute to innovative projects in a challenging environment.
Education
Thapar Institute of Engineering and Technology Patiala, Punjab
Bachelors of Engineering in Computer Engineering Sep 2022 – June 2026
• Cumulative GPA : 8.62/10.0
Dhruv Public School Pundri , Kaithal , Haryana
12th CBSE (87percent) 2022
Dhruv Public School Pundri , Kaithal , Haryana
10th CBSE (90percent) 2020
Relevant Coursework
• Data Structures
• Operating Systems
• Database Management
• Artificial Intelligence
• OOPS
• Algorithm Analysis
• DBMS
Experience
ECON CLUB 2023-Present
Core Member Patiala, Punjab
• Developed a website for the society event, incorporating interactive elements and real-time updates.
• Led the technical team to create a virtual event interface, enhancing user engagement by 30%.
Summer ELC , 10 June 2024 - 20 July 2024
Internship T.I.E.T-Patiala
• Engineered a 3D Printed Robotic Arm with gesture and vision control, improving safety in hazardous environments.
• Implemented AI algorithms for precise control and operation of the robotic arm.
Projects
Dream Analyser ChatBot | AI,Python July 2023
∗ Developed an AI-powered chatbot using Python to analyze and interpret user dreams based on dream psychology.
∗ Implemented machine learning techniques to improve response accuracy by 25%.
WDS Stock Market Exchange Interface | HTML,CSS,JS October 2023
∗ Created a dynamic web application for a virtual stock market using HTML, CSS, and JavaScript.
∗ Enabled real-time stock data updates, enhancing user experience and interaction.
∗ LINK : https://wolfofdalalstreet9.vercel.app/
PR website for event | HTML,CSS,JS October 2023
∗ This is the website made for publicity of the event.
∗ LINK : https://wds-finalprwebsite.vercel.app/
Various Self Projects | HTML,CSS,JS,Bootstrap October 2023
∗ Book issuing website page : Chirag Library (chiragjawa.github.io) /
∗ Food Service website :Document (chiragjawa.github.io)
∗ ATG : https://atgwebsite.netlify.app/
∗ PR Website : https://ipluefaauction.vercel.app/
Skills
- Programming Languages : C++, Python, JavaScript, SQL
- Web Technologies : HTML, CSS, ReactJS, Bootstrap,NodeJS , Expressjs
-Tools : Git, GitHub, Visual Studio Code -Database : MYSQL , Mongodb
"""
predicted_category = predict_category(resume_file)
print("Predicted Category:", predicted_category)

Predicted Category: ENGINEERING
