<h1> # AI-ML Research: Career Recommendation Engine </h1>

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings('ignore')

 <b>"""
    ML Concepts: 
    - Content-Based Filtering (TF-IDF + Cosine Similarity)</b> <br>
    <b>- Classification (Random Forest) </b><br>
    <b>- Natural Language Processing </b><br>
    """
<hr>
<b> """
        ML Algorithm: Content-Based Filtering </b><br>
        <b>Technique: TF-IDF Vectorization + Cosine Similarity </b><br>
        <b>Purpose: Match student skills with job requirements </b><br>
        """
<hr>
<b>"""
        ML Algorithm: Collaborative Filtering (Item-Based) </b><br>
        <b>Concept: Recommend based on similar students' preferences
        """</b><br>
<hr>
<b> """
        ML Concept: Hybrid Recommendation System</b><br>
       <b> Combines: Content-Based + Collaborative Filtering
        """ </b><br>

In [5]:

class CareerRecommendationAI:
   
    
    def __init__(self):
        self.student_data = None
        self.job_data = None
        self.tfidf_vectorizer = TfidfVectorizer(max_features=50, stop_words='english')
        self.classifier = RandomForestClassifier(n_estimators=100, random_state=42)
        
    def load_datasets(self):
        """Load student and job datasets"""
        # Student data
        self.student_data = pd.read_csv("C:/Users/ommji_mttma5p/OneDrive/Desktop/BPUT/career_platform/datasets/StudentsPerformance.csv")
        self.student_data['skills'] = self.student_data.apply(
            lambda x: f"math_{x['math score']} writing_{x['writing score']} reading_{x['reading score']}", 
            axis=1
        )
        
        # Job data from LinkedIn
        self.job_data = pd.read_csv('C:/Users/ommji_mttma5p/OneDrive/Desktop/BPUT/career_platform/datasets/postings.csv')
        
        # Career recommendation dataset
        career_df = pd.read_csv("C:/Users/ommji_mttma5p/OneDrive/Desktop/BPUT/career_platform/datasets/AI-based Career Recommendation System.csv")
        
        print("ðŸ“Š Loaded Multiple Datasets for Career Recommendation")
        print(f"   - Student Records: {len(self.student_data)}")
        print(f"   - Job Postings: {len(self.job_data)}")
        print(f"   - Career Paths: {len(career_df)}")
        
        return self.student_data, self.job_data
    
    def content_based_recommendation(self, student_skills, top_n=5):
       
        print(f"\nðŸŽ¯ ML Step 1: Content-Based Filtering for {student_skills}")
        
        # Sample job descriptions (in real scenario, use actual job data)
        job_descriptions = [
            "python java sql machine learning data analysis",
            "javascript html css react web development",
            "communication teamwork leadership project management",
            "python data science statistics analytics",
            "java spring hibernate software development",
            "cloud computing aws docker devops",
            "digital marketing seo social media",
            "mobile development android ios flutter"
        ]
        
        job_titles = [
            "Data Scientist", "Web Developer", "Project Manager", 
            "Data Analyst", "Software Engineer", "Cloud Engineer",
            "Digital Marketer", "Mobile Developer"
        ]
        
        # Combine student skills with job descriptions
        all_texts = [student_skills] + job_descriptions
        
        # TF-IDF Vectorization
        tfidf_matrix = self.tfidf_vectorizer.fit_transform(all_texts)
        
        # Calculate cosine similarity
        cosine_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
        
        # Get top recommendations
        top_indices = cosine_similarities.argsort()[-top_n:][::-1]
        
        recommendations = []
        for idx in top_indices:
            recommendations.append({
                'job_title': job_titles[idx],
                'similarity_score': round(cosine_similarities[idx] * 100, 2),
                'required_skills': job_descriptions[idx],
                'algorithm': 'TF-IDF + Cosine Similarity'
            })
        
        print("âœ… Content-Based Recommendations Generated")
        for rec in recommendations:
            print(f"   ðŸŽ¯ {rec['job_title']}: {rec['similarity_score']}% match")
            
        return recommendations
    
    def collaborative_filtering_recommendation(self, student_profile):
        
        print(f"\nðŸŽ¯ ML Step 2: Collaborative Filtering")
        
        # Simulate student-course interactions (in real scenario, use actual data)
        student_courses = {
            'student1': ['python', 'data_science', 'machine_learning'],
            'student2': ['python', 'web_development', 'javascript'],
            'student3': ['java', 'software_engineering', 'algorithms'],
            'student4': ['python', 'data_analysis', 'statistics'],
            'student5': ['communication', 'leadership', 'project_management']
        }
        
        # Find similar students (based on skills overlap)
        student_skills = set(student_profile.lower().split())
        
        similarities = []
        for student, courses in student_courses.items():
            course_set = set(courses)
            similarity = len(student_skills.intersection(course_set)) / len(student_skills.union(course_set))
            similarities.append((student, similarity))
        
        # Get recommendations from most similar students
        similarities.sort(key=lambda x: x[1], reverse=True)
        similar_student = similarities[0][0] if similarities else 'student1'
        
        recommendations = []
        for course in student_courses[similar_student]:
            if course not in student_profile.lower():
                recommendations.append({
                    'course': course.replace('_', ' ').title(),
                    'recommended_by': 'Similar Student Profile',
                    'algorithm': 'Collaborative Filtering'
                })
        
        print("âœ… Collaborative Filtering Recommendations")
        for rec in recommendations[:3]:
            print(f"   ðŸ“š Learn: {rec['course']}")
            
        return recommendations
    
    def hybrid_recommendation(self, student_skills):
       
        print(f"\nðŸš€ ML Step 3: Hybrid Recommendation System")
        
        # Get content-based recommendations
        content_recs = self.content_based_recommendation(student_skills, top_n=3)
        
        # Get collaborative recommendations  
        collab_recs = self.collaborative_filtering_recommendation(student_skills)
        
        # Combine and rank recommendations
        hybrid_recs = []
        
        for rec in content_recs:
            hybrid_recs.append({
                'type': 'Job Role',
                'recommendation': rec['job_title'],
                'confidence': rec['similarity_score'],
                'source': 'Content-Based Filtering'
            })
        
        for rec in collab_recs[:2]:
            hybrid_recs.append({
                'type': 'Skill Development',
                'recommendation': rec['course'],
                'confidence': 75.0,  # Default confidence
                'source': 'Collaborative Filtering'
            })
        
        print("âœ… Hybrid Recommendations Generated")
        print("   Combining best of both approaches:")
        print("   - Content-Based: Skill-to-Job matching")
        print("   - Collaborative: Peer-based learning paths")
        
        return hybrid_recs
    
    def explain_ml_concepts(self):
        """Explain all ML concepts used in career recommendation"""
        concepts = {
            'content_based_filtering': {
                'algorithm': 'TF-IDF + Cosine Similarity',
                'purpose': 'Match text similarity between student skills and job descriptions',
                'input': 'Student skills text, Job description text',
                'output': 'Similarity scores and job recommendations',
                'advantage': 'No need for historical data, works with content alone'
            },
            'collaborative_filtering': {
                'algorithm': 'User-Based Collaborative Filtering', 
                'purpose': 'Recommend based on similar users preferences',
                'input': 'User-item interaction matrix',
                'output': 'Recommendations from similar user behavior',
                'advantage': 'Discovers unexpected but relevant recommendations'
            },
            'hybrid_system': {
                'approach': 'Combine Content-Based + Collaborative Filtering',
                'purpose': 'Leverage strengths of both methods',
                'benefit': 'More accurate and diverse recommendations',
                'business_value': 'Personalized career paths for each student'
            }
        }
        
        print("\nðŸ§  ML Concepts in Career Recommendation")
        for concept, details in concepts.items():
            print(f"\nðŸ“– {concept.replace('_', ' ').title()}:")
            for key, value in details.items():
                print(f"   {key.replace('_', ' ').title()}: {value}")



In [6]:
# Execute career recommendation
recommender = CareerRecommendationAI()
student_data, job_data = recommender.load_datasets()

# Sample student profile
sample_student = "python java sql communication problem solving"

# Generate recommendations
content_recs = recommender.content_based_recommendation(sample_student)
collab_recs = recommender.collaborative_filtering_recommendation(sample_student)  
hybrid_recs = recommender.hybrid_recommendation(sample_student)

# Explain ML concepts
recommender.explain_ml_concepts()

ðŸ“Š Loaded Multiple Datasets for Career Recommendation
   - Student Records: 1000
   - Job Postings: 123849
   - Career Paths: 200

ðŸŽ¯ ML Step 1: Content-Based Filtering for python java sql communication problem solving
âœ… Content-Based Recommendations Generated
   ðŸŽ¯ Data Scientist: 35.98% match
   ðŸŽ¯ Project Manager: 15.48% match
   ðŸŽ¯ Software Engineer: 12.58% match
   ðŸŽ¯ Data Analyst: 12.32% match
   ðŸŽ¯ Cloud Engineer: 0.0% match

ðŸŽ¯ ML Step 2: Collaborative Filtering
âœ… Collaborative Filtering Recommendations
   ðŸ“š Learn: Data Science
   ðŸ“š Learn: Machine Learning

ðŸš€ ML Step 3: Hybrid Recommendation System

ðŸŽ¯ ML Step 1: Content-Based Filtering for python java sql communication problem solving
âœ… Content-Based Recommendations Generated
   ðŸŽ¯ Data Scientist: 35.98% match
   ðŸŽ¯ Project Manager: 15.48% match
   ðŸŽ¯ Software Engineer: 12.58% match

ðŸŽ¯ ML Step 2: Collaborative Filtering
âœ… Collaborative Filtering Recommendations
   ðŸ“š Learn: Data Sc