In [1]:
import spacy
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
nlp = spacy.load("en_core_web_sm")

In [66]:
data = [
    ("Experienced software developer skilled in Python, Java, and machine learning.", "Software Developer"),
    ("Certified public accountant with experience in auditing and tax planning.", "Accountant"),
    ("Marketing manager with expertise in SEO, content creation, and campaign strategy.", "Marketing"),
    ("Data scientist proficient in R, Python, and data visualization.", "Data Scientist"),
    ("Human resources specialist focusing on recruitment and employee relations.", "HR"),
    ("Software engineer with experience in web development, Python, and cloud computing.", "Software Developer"),
    ("Accountant with skills in financial reporting, auditing, and taxation.", "Accountant"),
    ("Digital marketing specialist skilled in SEO, PPC, and social media strategy.", "Marketing"),
    ("Machine learning engineer experienced with Python, TensorFlow, and data analytics.", "Data Scientist"),
    ("HR manager handling recruitment, onboarding, and organizational development.", "HR"),
]


In [67]:
df = pd.DataFrame(data, columns=["resume", "category"])


In [68]:
def preprocess_text(text):
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha]
    return ' '.join(tokens)

In [69]:
df['cleaned_text'] = df['resume'].apply(preprocess_text)

In [70]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_text'])
y = df['category']

In [76]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42,stratify=y)

In [77]:
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [78]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6
