# Jobpost Classification

In [36]:
from joblib import load
import spacy
import re
import ipywidgets as widgets
from IPython.display import display

# Load the Random Forest model and TF-IDF Vectorizer
rf_model = load("random_forest_model.joblib")
tfidf_vectorizer = load("tfidf_vectorizer.joblib")

nlp = spacy.load("en_core_web_sm")  # Make sure to load the correct spaCy model
print("Models and vectorizer loaded successfully.")

Models and vectorizer loaded successfully.


In [37]:
def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove punctuation and numbers
    text = text.strip().lower()  # Strip whitespace and convert to lowercase
    text = re.sub(r'http\S+', '', text)  # Remove URL from text
    doc = nlp(text)  # Process text with spaCy
    tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]  # Lemmatize, remove stopwords
    return " ".join(tokens)  # Join tokens into a string

In [38]:
def predict_job_description():
    # Preprocess the input text
    preprocessed_text = preprocess_text(job_description_text.value)

    # Vectorize the preprocessed text
    vectorized_text = tfidf_vectorizer.transform([preprocessed_text])

    # Make a prediction
    prediction = rf_model.predict(vectorized_text)

    # Display the prediction
    prediction_output.value = f'Prediction: {prediction[0]}'

# Create input and output widgets
job_description_text = widgets.Textarea(placeholder='Type the job description here...', description='Input:', layout={'width': '400px', 'height': '200px'})
prediction_output = widgets.Label(value='Prediction will be displayed here...')
predict_button = widgets.Button(description="Predict")

# Display the widgets
display(job_description_text, predict_button, prediction_output)

# Button click event
predict_button.on_click(lambda x: predict_job_description())

Textarea(value='', description='Input:', layout=Layout(height='200px', width='400px'), placeholder='Type the j…

Button(description='Predict', style=ButtonStyle())

Label(value='Prediction will be displayed here...')

# Job Similarity

In [52]:
from gensim.models import Doc2Vec

# Assuming your Doc2Vec model is already trained and loaded
model_sim = Doc2Vec.load("similarity_model.model")
print("Model loaded successfully.")

def get_relevant_titles(job_title):
    # Infer vector for the input job title
    vector = model_sim.infer_vector(job_title.split())
    # Get most similar titles from the model
    similar_docs = model_sim.dv.most_similar(positive=[vector], topn=10)
    # Create a DataFrame to display the results nicely
    similarity = pd.DataFrame(similar_docs, columns=['Job Title', 'Similarity Score'])
    return similarity

def on_predict_button_clicked(b):
    # Get the job title from input
    job_title = job_title_input.value
    # Get the relevant titles
    relevant_titles = get_relevant_titles(job_title)
    # Display results
    with output:
        output.clear_output()
        display(Markdown("### Top 10 Relevant Job Titles"))
        for index, row in relevant_titles.iterrows():
            # Using the round function to round the similarity score
            rounded_score = round(row['Similarity Score'], 3)
            display(Markdown(f"{index+1}. **{row['Job Title']}** - Similarity Score: {rounded_score}"))


Model loaded successfully.


In [53]:
import ipywidgets as widgets
from IPython.display import display, Markdown
import pandas as pd

# Create widgets
job_title_input = widgets.Text(placeholder='Enter a job title...', description='Job Title:')
predict_button = widgets.Button(description='Find Similar Titles')
output = widgets.Output()

# Display widgets
display(job_title_input, predict_button, output)

# Button click action
predict_button.on_click(on_predict_button_clicked)

Text(value='', description='Job Title:', placeholder='Enter a job title...')

Button(description='Find Similar Titles', style=ButtonStyle())

Output()