In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
import re


In [None]:
url = "https://raw.githubusercontent.com/benedicta-kelechi/datasets/main/resume_data.csv"
df = pd.read_csv(url)
df.head()


In [None]:
def clean_text(text):
    text = re.sub(r'[^\w\s]', '', text)
    text = text.lower()
    return text

df['cleaned_resume'] = df['Resume'].apply(clean_text)


In [None]:
X = df['cleaned_resume']
y = df['Category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
vectorizer = TfidfVectorizer(stop_words='english', max_features=3000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)


In [None]:
y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [None]:
sample_resume = """
Experienced web developer with knowledge in HTML, CSS, JavaScript, and backend frameworks such as Node.js and Django.
"""
sample_clean = clean_text(sample_resume)
sample_vector = vectorizer.transform([sample_clean])
prediction = model.predict(sample_vector)

print("Predicted Job Role:", prediction[0])
