In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD

def filter_courses_and_print_top_5(csv_file):
    # Step 1: Read CSV File
    df = pd.read_csv(csv_file)

    # Step 2: Select Features
    features = ["Course Title", "Rating", "Level", "Duration to complete (Approx.)", "Keyword"]

    # Step 3: Create a column in DF which combines all selected features
    for feature in features:
        df[feature] = df[feature].fillna('')

    def combine_features(row):
        try:
            return row['Course Title'] + " " + row['Level'] + " " + row['Keyword']
        except Exception as e:
            print("Error in combining features:", e)
            return ''

    df["combined_features"] = df.apply(combine_features, axis=1)

    # Step 4: Apply Singular Value Decomposition (SVD)
    cv = CountVectorizer()
    count_matrix = cv.fit_transform(df["combined_features"])

    svd = TruncatedSVD(n_components=50, random_state=42)
    count_matrix_reduced = svd.fit_transform(count_matrix)

    # Step 5: Get user input
    level_needed = input("Enter the level needed (e.g., Beginner, Intermediate, Advanced): ").strip().lower()
    hours_available = int(input("Enter the number of hours available for study: "))
    keywords = [keyword.strip().lower() for keyword in input("Enter keywords (separated by commas if multiple): ").split(',')]

    print("\nUser Input:")
    print("Level Needed:", level_needed)
    print("Hours Available:", hours_available)
    print("Keywords:", keywords)

    # Step 6: Filter courses based on user input
    filtered_indices = []
    for i, row in df.iterrows():
        if row['Level'].lower() == level_needed and float(row['Duration to complete (Approx.)']) <= hours_available and all(keyword.strip().lower() in row['Keyword'].lower() for keyword in keywords):
            filtered_indices.append(i)

    # Step 7: Recommend courses based on filtered indices using SVD
    recommended_courses = {}
    for idx in filtered_indices:
        score = sum(count_matrix_reduced[idx])
        recommended_courses[idx] = score

    # Step 8: Sort recommended courses based on score and print top 5
    recommended_courses = dict(sorted(recommended_courses.items(), key=lambda item: item[1], reverse=True)[:5])

    print("\nTop 5 Recommended Courses:")
    for idx, score in recommended_courses.items():
        print("Course Title:", df.loc[idx, 'Course Title'])
        print("URL:", df.loc[idx, 'Course Url'])

# Call the function with the CSV file name as an argument
filter_courses_and_print_top_5("new.csv")


Enter the level needed (e.g., Beginner, Intermediate, Advanced): Beginner Level
Enter the number of hours available for study: 20
Enter keywords (separated by commas if multiple): Arts and Humanities

User Input:
Level Needed: beginner level
Hours Available: 20
Keywords: ['arts and humanities']

Top 5 Recommended Courses:
Course Title: At the Origins of the Mediterranean Civilization: Archaeology of the City from the Levant to the West - 3rd-1st millennium BC
URL: https://www.coursera.org/learn/archaeology-city-levant-west
Course Title: The History of Modern Israel - Part I: From an Idea to a State
URL: https://www.coursera.org/learn/history-israel
Course Title: The Epistemic Quest for Truth: Introduction to epistemology
URL: https://www.coursera.org/learn/epistemology
Course Title: The Modern World, Part One: Global History from 1760 to 1910
URL: https://www.coursera.org/learn/modern-world
Course Title: The Changing Landscape of Ancient Rome. Archaeology and History of the Palatine Hi