## Prediction 

In [1]:
# Import required libraries
import pickle
import re
import numpy as np

# Load the saved components
tfidf = pickle.load(open('tfidf.pkl', 'rb'))
svc_model = pickle.load(open('clf.pkl', 'rb'))
le = pickle.load(open('encoder.pkl', 'rb'))

In [2]:
# Define the cleanResume function (same as used in training)
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)  
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText

  cleanText = re.sub('http\S+\s', ' ', txt)
  cleanText = re.sub('#\S+\s', ' ', cleanText)
  cleanText = re.sub('@\S+', '  ', cleanText)
  cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
  cleanText = re.sub('\s+', ' ', cleanText)


In [3]:
# Test loading the saved files to ensure they work properly
print("Testing saved files...")
print("="*30)

try:
    print("✅ All files loaded successfully!")
    
    # Test with a sample resume
    sample_resume = "Experienced software engineer with Python, machine learning, and data analysis skills"
    
    # Preprocess the sample
    cleaned_sample = cleanResume(sample_resume)
    vectorized_sample = tfidf.transform([cleaned_sample])
    vectorized_sample = vectorized_sample.toarray()
    
    # Make prediction
    prediction = svc_model.predict(vectorized_sample)
    predicted_category = le.inverse_transform(prediction)[0]
    
    print(f"\n🧪 Test Prediction:")
    print(f"   Sample Resume: '{sample_resume}'")
    print(f"   Predicted Category: {predicted_category}")
    
    print(f"\n✅ Model deployment ready!")
    
except Exception as e:
    print(f"❌ Error loading files: {e}")

Testing saved files...
✅ All files loaded successfully!

🧪 Test Prediction:
   Sample Resume: 'Experienced software engineer with Python, machine learning, and data analysis skills'
   Predicted Category: Data Science

✅ Model deployment ready!


In [4]:
# Function to predict the category of a resume
def pred(input_resume):
    # Preprocess the input text (e.g., cleaning, etc.)
    cleaned_text = cleanResume(input_resume) 

    # Vectorize the cleaned text using the same TF-IDF vectorizer used during training
    vectorized_text = tfidf.transform([cleaned_text])
    
    # Convert sparse matrix to dense
    vectorized_text = vectorized_text.toarray()

    # Prediction
    predicted_category = svc_model.predict(vectorized_text)

    # get name of predicted category
    predicted_category_name = le.inverse_transform(predicted_category)

    return predicted_category_name[0]  # Return the category name

In [5]:
# Test with multiple resume examples
myresume = """I am a data scientist specializing in machine
learning, deep learning, and computer vision. With
a strong background in mathematics, statistics,
and programming, I am passionate about
uncovering hidden patterns and insights in data.
I have extensive experience in developing
predictive models, implementing deep learning
algorithms, and designing computer vision
systems. My technical skills include proficiency in
Python, Sklearn, TensorFlow, and PyTorch."""

print("🔍 TESTING RESUME EXAMPLES")
print("="*50)

print("Data Science Resume:")
result = pred(myresume)
print(f"Prediction: {result}")

myresume = """
Jane Smith is a certified personal trainer with over 5 years of experience in helping individuals achieve their fitness goals. Specializing in weight loss, strength training, and sports conditioning, Jane has developed personalized workout routines for clients of all ages and fitness levels. She has extensive knowledge in nutrition and exercise science, and uses this to create holistic health and fitness programs that are tailored to individual needs."""

print("\nHealth and Fitness Resume:")
result = pred(myresume)
print(f"Prediction: {result}")

myresume = """
John Doe is an experienced Network Security Engineer with over 7 years of expertise in designing, implementing, and managing network security infrastructures. Specializing in safeguarding critical network systems, John has worked with various organizations to protect against cyber threats, data breaches, and unauthorized access. He is proficient in deploying firewalls, intrusion detection systems (IDS), VPNs, and network monitoring tools."""

print("\nNetwork Security Resume:")
result = pred(myresume)
print(f"Prediction: {result}")

🔍 TESTING RESUME EXAMPLES
Data Science Resume:
Prediction: Data Science

Health and Fitness Resume:
Prediction: Health and fitness

Network Security Resume:
Prediction: Network Security Engineer


In [6]:
# Interactive prediction function
def interactive_prediction():
    """
    Interactive function to predict resume categories
    """
    print("🎯 INTERACTIVE RESUME CATEGORY PREDICTOR")
    print("="*45)
    print("Enter a resume text to predict its category.")
    print("Type 'quit' to exit.\n")
    
    while True:
        user_input = input("Enter resume text: ")
        
        if user_input.lower() == 'quit':
            print("👋 Goodbye!")
            break
        
        if user_input.strip() == "":
            print("⚠️  Please enter some text.")
            continue
        
        category = pred(user_input)
        print(f"\n📊 Prediction Result:")
        print(f"   Category: {category}\n")
        print("-" * 40)

# Uncomment the line below to run interactive mode
# interactive_prediction()