In [1]:
import pandas as pd
import neattext.functions as nfx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import fuzz



In [7]:
def recommend_course_with_fuzzy(df, course_descriptions, title, num_of_rec, dataset_name):
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(course_descriptions)

    # Preprocess search title
    search_title_processed = nfx.remove_stopwords(nfx.remove_special_characters(title.lower()))

    # Convert DataFrame course titles to lowercase for case-insensitive comparison
    df['course_title_lower'] = df['course_title'].str.lower()

    # Initialize a list to store recommendations
    recommendations = []

    # Iterate through each course title and calculate the similarity score
    for i, course_title in enumerate(df['course_title_lower']):
        course_title_processed = nfx.remove_stopwords(nfx.remove_special_characters(course_title))

        # Calculate Levenshtein distance-based similarity score
        similarity_score = fuzz.token_set_ratio(search_title_processed, course_title_processed) / 100.0
        
        # Append course title, similarity score, and dataset name to recommendations list
        recommendations.append((df.iloc[i]['course_title'], similarity_score ,dataset_name))

    # Sort recommendations based on similarity score in descending order
    recommendations.sort(key=lambda x: x[1], reverse=True)

    # Remove the temporary lowercase column
    df.drop(columns=['course_title_lower'], inplace=True)

    # Convert recommendations list to DataFrame
    recommended_courses_df = pd.DataFrame(recommendations, columns=['course_title', 'similarity_score', 'dataset_name'])

    # Filter recommendations based on the specified similarity score threshold
    recommended_courses_df = recommended_courses_df[recommended_courses_df['similarity_score'] >= 0.8]

    # Select top 10 recommendations
    recommended_courses_df = recommended_courses_df.head(num_of_rec)

    # Drop the 'similarity_score' column
    recommended_courses_df.drop(columns=['similarity_score'], inplace=True)

    return recommended_courses_df

In [8]:
def load_and_preprocess_data(csv_file_path):
    df = pd.read_csv(csv_file_path)
    df['clean_course_title'] = df['course_title'].apply(nfx.remove_stopwords)
    df['clean_course_title'] = df['clean_course_title'].apply(nfx.remove_special_characters)
    return df

In [9]:
df1 = load_and_preprocess_data(r"C:\Users\HP\Desktop\my-mantine-app\Udemy_courses.csv")

# Load and preprocess second dataset
df2 = load_and_preprocess_data(r"C:\Users\HP\Desktop\my-mantine-app\coursera_courses.csv")

In [10]:
title = "autocad"  # Title of the course you want recommendations for (case-insensitive)
num_of_rec = 10  # Number of recommendations

# Perform recommendation for the first dataset using fuzzy matching
recommended_courses_df1 = recommend_course_with_fuzzy(df1, df1['course_title'].tolist(), title, num_of_rec, "Udemy")

# Perform recommendation for the second dataset using fuzzy matching
recommended_courses_df2 = recommend_course_with_fuzzy(df2, df2['course_title'].tolist(), title, num_of_rec, "Coursera")

# Concatenate both recommendation DataFrames
all_recommendations = pd.concat([recommended_courses_df1, recommended_courses_df2], ignore_index=True)

print("Top 10 Recommendations with similarity score >= 0.8 (without similarity_score column):")
print(all_recommendations)

Top 10 Recommendations with similarity score >= 0.8 (without similarity_score column):
                                         course_title dataset_name
0                                    AutoCAD Tutorial        Udemy
1            Learning Autodesk AutoCAD - Crash Course        Udemy
2               Master of AutoCAD 2D / 3D in 4 hours!        Udemy
3                              Learning AutoCAD® 2013        Udemy
4        Learning AutoCAD - 3D Modeling and Rendering        Udemy
5                      AutoCAD 2014 - 2D Fundamentals        Udemy
6                                        Autocad 2010        Udemy
7                   Architecture 101 Using AutoCAD LT        Udemy
8   Create photorealistic house from scratch to en...        Udemy
9   3DS Max, AutoCAD, Vray: Creating a Complete In...        Udemy
10  Autodesk Certified Professional: AutoCAD for D...     Coursera
