In [6]:
import os
import json
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

True

In [13]:
import os
import json
from openai import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()


class CourseRecommender:
    """
    Recommends courses that cover multiple missing skills efficiently.
    Separates recommendations into free and paid courses.
    """
    
    def __init__(self, api_key=None):
        """
        Initialize the CourseRecommender with API credentials.
        
        Args:
            api_key (str): OpenAI API key. If None, reads from environment variable.
        """
        self.api_key = api_key or os.getenv('OPENAI_API_KEY')
        if not self.api_key:
            raise ValueError("OpenAI API key not provided. Set OPENAI_API_KEY environment variable.")
        
        self.client = OpenAI(api_key=self.api_key)
        self.missing_skills = []
    
    def recommend_courses(self, missing_skills, num_free_courses=3, num_paid_courses=3):
        """
        Recommends courses that cover multiple skills from the missing_skills list.
        Split into free and paid course sections.
        
        Args:
            missing_skills (list): List of skills that need to be learned
            num_free_courses (int): Number of free courses to recommend (default: 3)
            num_paid_courses (int): Number of paid courses to recommend (default: 3)
            
        Returns:
            dict: Dictionary with free and paid course recommendations and skill coverage
        """
        if not missing_skills:
            return {"message": "No missing skills provided"}
        
        # Store missing_skills for later use
        self.missing_skills = missing_skills
        
        prompt = self._create_prompt(missing_skills, num_free_courses, num_paid_courses)
        
        try:
            response = self.client.chat.completions.create(
                model="gpt-3.5-turbo",  # Faster model
                messages=[
                    {"role": "system", "content": "You are a career advisor specializing in efficient skill development through comprehensive courses."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.7,
                max_tokens=2500
            )
            
            recommendations = self._parse_response(response.choices[0].message.content)
            return recommendations
            
        except Exception as e:
            return {"error": f"Failed to generate recommendations: {str(e)}"}
    
    def _create_prompt(self, missing_skills, num_free_courses, num_paid_courses):
        """
        Create a detailed prompt for the LLM.
        
        Args:
            missing_skills (list): List of missing skills
            num_free_courses (int): Number of free courses to recommend
            num_paid_courses (int): Number of paid courses to recommend
            
        Returns:
            str: Formatted prompt
        """
        skills_str = ", ".join(missing_skills)
        
        prompt = f"""
I need to learn the following skills: {skills_str}

Please recommend courses in TWO SEPARATE SECTIONS:
1. FREE COURSES: {num_free_courses} free courses
2. PAID COURSES: {num_paid_courses} paid courses

Each section should contain comprehensive courses that cover MULTIPLE skills from my list.
I want courses that are efficient and teach several related skills together.

IMPORTANT: Only recommend courses from these three platforms:
- DeepLearning.AI
- Udemy
- Coursera

For each course recommendation, provide:
1. Course Title
2. Platform (MUST be one of: DeepLearning.AI, Udemy, or Coursera)
3. Skills Covered (list which skills from my list this course teaches)
4. Additional Skills (any bonus skills not in my list that this course also covers)
5. Duration (estimated time to complete)
6. Difficulty Level (Beginner, Intermediate, Advanced)
7. Description (3-4 sentences about what the course covers)
8. Why it's efficient (explain how it covers multiple skills effectively)
9. Cost (for FREE section: must be "Free" or "Free with paid certificate option"; for PAID section: include approximate price like "$49.99" or "$199")
10. Link (if you know a specific course URL, otherwise null)

Prioritize:
- Courses that cover the MOST skills from my list
- Well-reviewed courses from reputable instructors
- Project-based and practical courses
- Up-to-date content
- Mix of free and paid options
- ONLY from DeepLearning.AI, Udemy, or Coursera

At the end, provide a skill coverage summary showing which skills are covered by which courses.

Format your response as a JSON object with this structure:
{{
    "free_courses": [
        {{
            "title": "Course Title",
            "platform": "Platform Name",
            "skills_covered": ["skill1", "skill2", "skill3"],
            "additional_skills": ["bonus_skill1", "bonus_skill2"],
            "duration": "X weeks/hours",
            "difficulty": "Beginner/Intermediate/Advanced",
            "description": "Course description",
            "why_efficient": "Explanation of multi-skill coverage",
            "cost": "Free",
            "link": "URL or null"
        }}
    ],
    "paid_courses": [
        {{
            "title": "Course Title",
            "platform": "Platform Name",
            "skills_covered": ["skill1", "skill2", "skill3"],
            "additional_skills": ["bonus_skill1", "bonus_skill2"],
            "duration": "X weeks/hours",
            "difficulty": "Beginner/Intermediate/Advanced",
            "description": "Course description",
            "why_efficient": "Explanation of multi-skill coverage",
            "cost": "$XX.XX",
            "link": "URL or null"
        }}
    ],
    "skill_coverage": {{
        "skill_name": ["Course 1", "Course 2"],
        "another_skill": ["Course 1"]
    }},
    "uncovered_skills": ["skill1", "skill2"],
    "coverage_percentage": 85
}}

Only return valid JSON, no additional text.
"""
        return prompt
    
    def _parse_response(self, response_text):
        """
        Parse the LLM response into structured format.
        
        Args:
            response_text (str): Raw response from LLM
            
        Returns:
            dict: Parsed recommendations
        """
        try:
            start_idx = response_text.find('{')
            end_idx = response_text.rfind('}') + 1
            
            if start_idx != -1 and end_idx != 0:
                json_str = response_text[start_idx:end_idx]
                recommendations = json.loads(json_str)
                return recommendations
            else:
                return {"error": "Could not parse JSON from response", "raw_response": response_text}
                
        except json.JSONDecodeError as e:
            return {"error": f"JSON parsing error: {str(e)}", "raw_response": response_text}
    
    def save_recommendations(self, recommendations, free_filename="free_courses.json", paid_filename="paid_courses.json"):
        """
        Save recommendations to two separate JSON files - one for free courses, one for paid.
        
        Args:
            recommendations (dict): Course recommendations
            free_filename (str): Filename for free courses (default: "free_courses.json")
            paid_filename (str): Filename for paid courses (default: "paid_courses.json")
        """
        try:
            # Save free courses
            free_data = {
                "free_courses": recommendations.get('free_courses', []),
                "skill_coverage": recommendations.get('skill_coverage', {}),
                "uncovered_skills": recommendations.get('uncovered_skills', []),
                "coverage_percentage": recommendations.get('coverage_percentage', 0)
            }
            
            with open(free_filename, 'w') as f:
                json.dump(free_data, f, indent=4)
            print(f"\n✅ Free courses saved to {free_filename}")
            
            # Save paid courses
            paid_data = {
                "paid_courses": recommendations.get('paid_courses', []),
                "skill_coverage": recommendations.get('skill_coverage', {}),
                "uncovered_skills": recommendations.get('uncovered_skills', []),
                "coverage_percentage": recommendations.get('coverage_percentage', 0)
            }
            
            with open(paid_filename, 'w') as f:
                json.dump(paid_data, f, indent=4)
            print(f"✅ Paid courses saved to {paid_filename}")
            
            return free_filename, paid_filename
            
        except Exception as e:
            print(f"\n❌ Error saving recommendations: {str(e)}")
            return None, None
    
    def display_recommendations(self, recommendations):
        """
        Display recommendations in a user-friendly format with separate free and paid sections.
        """
        if "error" in recommendations:
            print(f"❌ Error: {recommendations['error']}")
            return
        
        free_courses = recommendations.get('free_courses', [])
        paid_courses = recommendations.get('paid_courses', [])
        skill_coverage = recommendations.get('skill_coverage', {})
        uncovered = recommendations.get('uncovered_skills', [])
        coverage_pct = recommendations.get('coverage_percentage', 0)
        
        print("\n" + "="*70)
        print("📚 MULTI-SKILL COURSE RECOMMENDATIONS")
        print("="*70 + "\n")
        
        print(f"✅ Overall Skill Coverage: {coverage_pct}%\n")
        
        # FREE COURSES SECTION
        print("\n" + "🆓"*35)
        print("💚 FREE COURSES")
        print("🆓"*35 + "\n")
        
        if free_courses:
            for i, course in enumerate(free_courses, 1):
                self._display_course(i, course)
        else:
            print("   No free courses available.\n")
        
        # PAID COURSES SECTION
        print("\n" + "💰"*35)
        print("💳 PAID COURSES (Premium Options)")
        print("💰"*35 + "\n")
        
        if paid_courses:
            for i, course in enumerate(paid_courses, 1):
                self._display_course(i, course)
        else:
            print("   No paid courses available.\n")
        
        # Skill Coverage Summary
        print(f"\n\n{'='*70}")
        print("📊 SKILL COVERAGE SUMMARY")
        print(f"{'='*70}\n")
        
        for skill, course_list in skill_coverage.items():
            print(f"✓ {skill}")
            for course_name in course_list:
                print(f"    → {course_name}")
            print()
        
        if uncovered:
            print(f"⚠️  UNCOVERED SKILLS ({len(uncovered)}):")
            for skill in uncovered:
                print(f"   • {skill}")
            print("\n   💡 Tip: Consider additional courses or projects for these skills.\n")
    
    def _display_course(self, index, course):
        """Helper method to display a single course."""
        print(f"\n{'='*70}")
        print(f"Course {index}: {course.get('title', 'N/A')}")
        print(f"{'='*70}")
        print(f"🎓 Platform: {course.get('platform', 'N/A')}")
        print(f"⏱️  Duration: {course.get('duration', 'N/A')}")
        print(f"📊 Difficulty: {course.get('difficulty', 'N/A')}")
        print(f"💰 Cost: {course.get('cost', 'N/A')}")
        
        skills_covered = course.get('skills_covered', [])
        if skills_covered:
            print(f"\n✨ Skills Covered ({len(skills_covered)}):")
            for skill in skills_covered:
                print(f"   • {skill}")
        
        additional = course.get('additional_skills', [])
        if additional:
            print(f"\n🎁 Bonus Skills:")
            for skill in additional:
                print(f"   • {skill}")
        
        print(f"\n📝 Description:")
        print(f"   {course.get('description', 'N/A')}")
        
        print(f"\n⚡ Why It's Efficient:")
        print(f"   {course.get('why_efficient', 'N/A')}")
        
        link = course.get('link')
        if link and link != 'null':
            print(f"\n🔗 Link: {link}")


# Example usage
if __name__ == "__main__":
    # Example missing skills
    missing_skills = [
        "Python", 
        "Machine Learning", 
        "Data Analysis", 
        "SQL", 
        "Data Visualization",
        "Statistics",
        "Pandas",
        "NumPy"
    ]
    
    recommender = CourseRecommender()
    print("🔍 Generating optimized course recommendations...")
    print(f"📋 Missing Skills: {', '.join(missing_skills)}\n")
    
    recommendations = recommender.recommend_courses(
        missing_skills, 
        num_free_courses=3, 
        num_paid_courses=3
    )
    
    # Display results
    recommender.display_recommendations(recommendations)
    
    # Save to two separate files
    recommender.save_recommendations(recommendations)

🔍 Generating optimized course recommendations...
📋 Missing Skills: Python, Machine Learning, Data Analysis, SQL, Data Visualization, Statistics, Pandas, NumPy


📚 MULTI-SKILL COURSE RECOMMENDATIONS

✅ Overall Skill Coverage: 100%


🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓
💚 FREE COURSES
🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓🆓


Course 1: Python for Everybody
🎓 Platform: Coursera
⏱️  Duration: 7 weeks
📊 Difficulty: Beginner
💰 Cost: Free

✨ Skills Covered (2):
   • Python
   • SQL

🎁 Bonus Skills:
   • Data Analysis

📝 Description:
   This course teaches Python programming from basics to advanced concepts, including data structures, databases, and SQL. It focuses on practical applications for data analysis.

⚡ Why It's Efficient:
   Efficiently covers Python and SQL fundamentals essential for data analysis tasks.

🔗 Link: https://www.coursera.org/specializations/python

Course 2: Data Science Math Skills
🎓 Platform: Coursera
⏱️  Duration: 4 weeks
📊 Difficulty: Intermediate
💰 Cost: Free

✨ Skills 