# Important Libraries

In [1]:
import pandas as pd
import spacy
import neattext.functions as nfx
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import joblib

# Load the dataset

In [2]:
df = pd.read_csv(r"C:\Users\HP 440 G5\Downloads\job_title_des.csv")
df.head(10)

Unnamed: 0.1,Unnamed: 0,Job Title,Job Description
0,0,Flutter Developer,We are looking for hire experts flutter develo...
1,1,Django Developer,PYTHON/DJANGO (Developer/Lead) - Job Code(PDJ ...
2,2,Machine Learning,"Data Scientist (Contractor)\n\nBangalore, IN\n..."
3,3,iOS Developer,JOB DESCRIPTION:\n\nStrong framework outside o...
4,4,Full Stack Developer,job responsibility full stack engineer – react...
5,5,Java Developer,Software Developer - Integration*\nImmediate O...
6,6,Full Stack Developer,senior full stack developer \- 1800026h cwt lo...
7,7,JavaScript Developer,"Job Description:\n\nReactJS + NodeJs, Azure Fu..."
8,8,DevOps Engineer,Main Responsibilities and Deliverables:\nManag...
9,9,Software Engineer,"Overview\n\n\nBased in Silicon Valley, Tintri ..."


# Checking Data 

In [3]:
df.shape

(2277, 3)

In [4]:
df.columns

Index(['Unnamed: 0', 'Job Title', 'Job Description'], dtype='object')

In [5]:
df = df.drop(columns=['Unnamed: 0'])

In [6]:
df.columns

Index(['Job Title', 'Job Description'], dtype='object')

In [7]:
df['Job Title'].value_counts()


Job Title
JavaScript Developer      166
Java Developer            161
Software Engineer         160
Node js developer         160
iOS Developer             159
PHP Developer             156
Flutter Developer         155
DevOps Engineer           155
Django Developer          152
Machine Learning          152
Backend Developer         147
Network Administrator     145
Database Administrator    139
Full Stack Developer      138
Wordpress Developer       132
Name: count, dtype: int64

In [8]:
nlp = spacy.load("en_core_web_sm")

In [9]:
def preprocess(text):
    text = nfx.remove_userhandles(text)
    text = nfx.remove_stopwords(text)
    doc = nlp(text.lower())
    
    tokens = []
    for token in doc:
        if token.is_alpha and not token.is_stop:
            # Keep "data" as is
            if token.text.lower() == "data":
                tokens.append("data")
            else:
                tokens.append(token.lemma_)
    
    return " ".join(tokens)


In [10]:
df['Clean_Description'] = df['Job Description'].apply(preprocess)
df[['Job Description', 'Clean_Description']].head(3)


Unnamed: 0,Job Description,Clean_Description
0,We are looking for hire experts flutter develo...,look hire expert flutter developer eligible po...
1,PYTHON/DJANGO (Developer/Lead) - Job Code(PDJ ...,python django developer lead job strong python...
2,"Data Scientist (Contractor)\n\nBangalore, IN\n...",data scientist contractor bangalore responsibi...


In [11]:
df.columns

Index(['Job Title', 'Job Description', 'Clean_Description'], dtype='object')

In [12]:
X = df['Clean_Description']
y = df['Job Title']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [13]:
log_reg_pipeline = Pipeline([('vectorizer', CountVectorizer()),('classifier', LogisticRegression(max_iter=200))])
log_reg_pipeline.fit(X_train, y_train)
log_reg_score = log_reg_pipeline.score(X_test, y_test)
print(f"Logistic Regression Accuracy: {log_reg_score:.4f}")

Logistic Regression Accuracy: 0.8026


In [14]:
svm_pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('classifier', SVC(kernel='linear'))
])
svm_pipeline.fit(X_train, y_train)

svm_score = svm_pipeline.score(X_test, y_test)
print(f"Support Vector Machine Accuracy: {svm_score:.4f}")


Support Vector Machine Accuracy: 0.7675


In [15]:
rf_pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('classifier', RandomForestClassifier(n_estimators=100))])
rf_pipeline.fit(X_train, y_train)
rf_score = rf_pipeline.score(X_test, y_test)
print(f"Random Forest Accuracy: {rf_score:.4f}")


Random Forest Accuracy: 0.7961


In [None]:
print("\n=== Job Recommendation System === (Type 'exit' to quit)")
while True:
    user_input = input("Enter your skills (comma separated): ")
    if user_input.lower() == "exit":
        print("Goodbye!")
        break

    skills = [skill.strip() for skill in user_input.split(',') if skill.strip()]
    recommended_jobs = set()

    for skill in skills:
        clean_skill = preprocess(skill)
        predicted_job = log_reg_pipeline.predict([clean_skill])[0]
        recommended_jobs.add(predicted_job)

    print("\nRecommended Jobs Based on Your Skills:")
    for job in recommended_jobs:
        print(f"- {job}")
    print()



=== Job Recommendation System === (Type 'exit' to quit)


Enter your skills (comma separated):  python,java,sql



Recommended Jobs Based on Your Skills:
- Django Developer
- Software Engineer
- Network Administrator

