In [2]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

# Step 1: Read CSV File
df = pd.read_csv("new.csv")

# Step 2: Select Features
features = ["Course Title", "Rating", "Level", "Duration to complete (Approx.)", "Keyword"]

# Step 3: Create a column in DF which combines all selected features
for feature in features:
    df[feature] = df[feature].fillna('')

def combine_features(row):
    try:
        return row['Course Title'] + " " + row['Level'] + " " + row['Keyword']
    except Exception as e:
        print("Error:", e)

df["combined_features"] = df.apply(combine_features, axis=1)

# Step 4: Create count matrix from this new combined column
cv = CountVectorizer()
count_matrix = cv.fit_transform(df["combined_features"])

# Step 5: Compute the Cosine Similarity based on the count_matrix
cosine_sim = cosine_similarity(count_matrix)

# Step 6: Apply Singular Value Decomposition (SVD) for collaborative filtering
svd = TruncatedSVD(n_components=50, random_state=42)
svd.fit(count_matrix)

# Step 7: Transform the count matrix into a reduced latent space
count_matrix_reduced = svd.transform(count_matrix)

# Step 8: Get user input
level_needed = input("Enter the level needed (e.g., Beginner, Intermediate, Advanced): ")
hours_available = int(input("Enter the number of hours available for study: "))
keywords = input("Enter keywords (separated by commas if multiple): ").split(',')

# Step 9: Filter courses based on user input
filtered_indices = []
for i, row in df.iterrows():
    if row['Level'] == level_needed and row['Duration to complete (Approx.)'] <= hours_available and all(keyword in row['Keyword'] for keyword in keywords):
        filtered_indices.append(i)

# Step 10: Recommend courses based on filtered indices
if len(filtered_indices) == 0:
    print("No courses found matching your criteria.")
else:
    print("Courses matching your criteria:")
    for idx in filtered_indices:
        print(df.loc[idx]['Course Title'], "with rating:", df.loc[idx]['Rating'])


Enter the level needed (e.g., Beginner, Intermediate, Advanced):  Beginner
Enter the number of hours available for study:  20
Enter keywords (separated by commas if multiple):  Arts and Humanities


No courses found matching your criteria.
