In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import nltk

In [2]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\dell\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\dell\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [3]:
data = pd.read_csv("UpdatedResumeDataSet.csv")

In [4]:
data.dropna(inplace=True)

In [5]:
# Preprocessing function
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert to lowercase
    text = word_tokenize(text)  # Tokenize text
    stop_words = set(stopwords.words('english'))
    text = [word for word in text if word not in stop_words]  # Remove stopwords
    return " ".join(text)

In [7]:
# Apply preprocessing
data['Cleaned_Resume'] = data['Resume'].apply(preprocess_text)

In [9]:
X = data['Cleaned_Resume']
y = data['Category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Vectorize text
tfidf = TfidfVectorizer()
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

In [11]:
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train_tfidf, y_train)

In [12]:
# Predict and evaluate
y_pred = svm.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9948186528497409
Classification Report:
                            precision    recall  f1-score   support

                 Advocate       1.00      1.00      1.00         3
                     Arts       1.00      1.00      1.00         6
       Automation Testing       1.00      1.00      1.00         5
               Blockchain       1.00      1.00      1.00         7
         Business Analyst       0.80      1.00      0.89         4
           Civil Engineer       1.00      1.00      1.00         9
             Data Science       1.00      1.00      1.00         5
                 Database       1.00      1.00      1.00         8
          DevOps Engineer       1.00      0.93      0.96        14
         DotNet Developer       1.00      1.00      1.00         5
            ETL Developer       1.00      1.00      1.00         7
   Electrical Engineering       1.00      1.00      1.00         6
                       HR       1.00      1.00      1.00        12
        

In [13]:
# Example resumes for testing
test_resumes = [
    """Skills: Proficient in Python, R, SQL, and Tableau. Hands-on experience with machine learning algorithms like regression, SVM, and decision trees. Performed data visualization using Matplotlib and Seaborn. Worked on a fraud detection project using Random Forest and K-means clustering. Developed predictive models for sales forecasting.""",
    
    """Skills: Proficient in Java, Spring Boot, and REST API development. Experience with building scalable backend systems. Developed e-commerce web applications using React and Node.js. Integrated payment gateways like Stripe and PayPal. Implemented microservices architecture for a high-traffic application.""",
    
    """Skills: Knowledge of penetration testing, network security, and vulnerability assessment. Hands-on experience with tools like Metasploit, Wireshark, and Burp Suite. Implemented intrusion detection systems and security protocols for enterprise networks. Performed malware analysis and threat hunting in large datasets.""",
    
    """Skills: Proficient in employee management systems, performance tracking, and recruitment strategies. Experienced in talent acquisition and training programs. Developed HR policies to improve workplace productivity. Conducted employee engagement surveys and created action plans based on feedback.""",
    
    """Skills: Expertise in digital marketing strategies, including SEO, PPC, and social media campaigns. Developed marketing plans and analyzed campaign performance using tools like Google Analytics and HubSpot. Conducted market research to identify customer needs and industry trends."""
]

# Preprocess the test resumes
test_resumes_cleaned = [preprocess_text(resume) for resume in test_resumes]

# Transform the resumes into TF-IDF features
test_resumes_tfidf = tfidf.transform(test_resumes_cleaned)

# Predict the category for each resume
predictions = svm.predict(test_resumes_tfidf)

# Display the predictions
for i, resume in enumerate(test_resumes):
    print(f"Resume {i+1} Prediction: {predictions[i]}")


Resume 1 Prediction: Data Science
Resume 2 Prediction: Java Developer
Resume 3 Prediction: HR
Resume 4 Prediction: HR
Resume 5 Prediction: Data Science
