## import libraries

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Load The Data_Set

In [None]:
df = pd.read_csv('/Users/ahsanali/Desktop/Resume_dataset/UpdatedResumeDataSet.csv')

In [None]:
df.head()

In [None]:
df.shape


### Exploring Categories ###


In [None]:
df['Category'].value_counts()

In [None]:
df.Category.unique()

In [None]:
df['Category'].unique()

In [None]:
counts = df['Category'].value_counts()
labels = df['Category'].unique()

plt.figure(figure=(10,10))
plt.pie(counts,labels=labels,autopct = '%1.1f%%', shadow = True, colors=plt.cm.plasma(np.linspace(0,1,3)))
plt.show()

### Data Cleaning ###

In [None]:
import re

def cleanResume(txt):
    cleantext = re.sub('http\S+\s', ' ',txt)
    cleantext = re.sub('RT|cc', ' ', cleantext)
    cleantext = re.sub('#\S+\s', ' ', cleantext)
    cleantext = re.sub('@\S+', ' ',cleantext)
    cleantext = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleantext)
    cleantext = re.sub(r'[^\x00-\x7f]', ' ', cleantext)
    cleantext = re.sub('\s+', ' ',cleantext)
    return(cleantext)

In [None]:
df['Resume'] = df['Resume'].apply(lambda x: cleanResume(x))
df['Resume'][1]

### Word into the Categorical Value ###

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
# Initialize the LabelEncoder
le = LabelEncoder()

# Fit and transform the 'Category' column
le.fit(df['Category'])
df['Category'] = le.transform(df['Category'])


### Vactorization ###

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
tfidf = TfidfVectorizer(stop_words = 'english')
tfidf.fit(df['Resume'])
required_txt = tfidf.transform(df['Resume'])

### Spliting the data ###

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(required_txt, df['Category'], test_size=0.2, random_state=42)

### Train Model and Classification Report ###

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score

clf = OneVsRestClassifier(KNeighborsClassifier())
clf.fit(X_train,y_train)
ypred = clf.predict(X_test)
print(accuracy_score(y_test, ypred))

## Save the Model

In [None]:
import pickle

pickle.dump(tfidf, open('tfidf.pk1', 'wb'))
pickle.dump(clf, open('clf.pk2', 'wb'))

In [None]:
sample_resume = """
ZULFIQAR

EXPERIENCE	SKILLS
I have 2 years experience in website front end and backend development,	
Learning or writing a code everyday one of them my passionate works, Now I’m also a student or just studying.	Web Development.
HTML, CSS.
I ’m also teaching WordPress online to a few of my students.	MSWord, PPT, Excel, Inpage.
Logo Designing.
EDUCATION	
CERTIFICATION
The Orbit Institute, Lahore — BS Software Engineering
NOV- 2019 - DEC- 2023	Wordpress
Now I’m Currently Studying In Semester-IV	Freelancing.
HTML, CSS
Govt. College, Lahore — ICS	MSOmce.
MARCH- 2018 - APRIL- 2019	
	HOBBIES
PROJECTS	Internet Surﬁng
Codding
Mother’s Happy Kid’s — Business Website
Design By Tanzeem Zulﬁqar.	Listening Music
Welcome to Mother's Happy Kid's (mothershappykids.com.pk)	
	LANGUAGES

English
"""

In [None]:
resume_sample_1 = """
Ahsan Ali

Father’s name:    M.Akram              Date of birth:  21-08-1998             C.N.I.C # 33401-0485863-9
Contact:   0303-7750606, 0316-1603495
Email: ahsan_ali0606@outlook.com

Address: BMC Stop Ram Pur, Rehmat Town Manawan G.T Road Bata Pur Lahore.
Personal profile:
A hardworking and adaptable individual who maintains a professional manner in the work place. Able to work under pressure in a methodical and responsible way whilst being able to meet deadlines. Mature and responsible, who
always strives to achieve the highest standard possible, with excellent communication skills (verbal and written) and the ability to relate to wide range of people. Seeking a challenging position where my skills can be further
developed and polished. I believe that I have the creativity and enthusiasm on any opportunity provided.

Personal Experience:

(2 Year)   > Assistant Accounts Manager         > Data entry operator. at Marhaba Internation Trading Co.

(2 Year & 7 Month)   >   Accounts Dept.    Manage Accounts, Generate Sale Invoice and handling Bank Transactions at Al Mehboob Shawl China Center. (to till)

Responsibilities:

1.  Manage Accounts, Generate Sale Invoice and handling Bank Transactions.
2.  Maintain all records for office use.
Professional skills:

1.  Data Analytic (Python, Machine learning, Deep learning)
2. Internet, Emails,
Qualification:
1. Martic in Arts from National scholars Academy.(2011 to 2013)
2. I.Com From Govt Islamia College Chiniot.(2013 to 2015)
3.Dip in AI and Data Analytics From Boston Institute of Analytics.(Feb 2024 to Aug 2024)
Hobbies:

1. Reading Books
Language:

Urdu, English
"""

## Testing For Prediction

In [None]:
# Load the Train Classifier
with open('clf.pkl', 'wb') as file:
    pickle.dump(clf, file)

with open('clf.pkl', 'rb') as file:
    clf = pickle.load(file)


# Clean the Input Resume
cleaned_Resume = cleanResume(sample_resume)

# Transform the clean resume using the train TfidfVectorizer
input_features = tfidf.transform([cleaned_Resume])

# Make a prediction using the load Classifier
prediction_id = clf.predict(input_features)[0]
# Category Mappaing

Category_Mappaing = {
    6: 'Data Science',
    12: 'HR',
    0: 'Advocate',
    1: 'Arts',
    24: 'Web Designing',
    16: 'Mechanical Engineer',
    22: 'Sales',
    14: 'Health and fitness',
    5: 'Civil Engineer',
    15: 'Java Developer',
    4: 'Business Analyst',
    21: 'SAP Developer',
    2: 'Automation Testing',
    11: 'Electrical Engineering',
    18: 'Operations Manager',
    20: 'Python Developer',
    8: 'DevOps Engineer',
    17: 'Network Security Engineer',
    19: 'PMO',
    7: 'Database',
    13: 'Hadoop',
    10: 'ETL Developer',
    9: 'DotNet Developer',
    3: 'Blockchain',
    23: 'Testing'
}
category_name = Category_Mappaing.get(prediction_id, 'unknown')
print('Predicted Category:', category_name)
print(prediction_id)

In [None]:
import numpy as np

# Define the arrays
numerical_codes = np.array([6, 12, 0, 1, 24, 16, 22, 14, 5, 15, 4, 21, 2, 11, 18, 20, 8, 17, 19, 7, 13, 10, 9, 3, 23])
job_titles = np.array([
    'Data Science', 'HR', 'Advocate', 'Arts', 'Web Designing',
    'Mechanical Engineer', 'Sales', 'Health and fitness',
    'Civil Engineer', 'Java Developer', 'Business Analyst',
    'SAP Developer', 'Automation Testing', 'Electrical Engineering',
    'Operations Manager', 'Python Developer', 'DevOps Engineer',
    'Network Security Engineer', 'PMO', 'Database', 'Hadoop',
    'ETL Developer', 'DotNet Developer', 'Blockchain', 'Testing'
])

# Create the dictionary mapping numerical codes to job titles
code_to_title = {code: job_titles[i] for i, code in enumerate(numerical_codes)}

# Print the dictionary
print(code_to_title)
