In [65]:
import pandas as pd
import neattext.functions as nfx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import fuzz

In [66]:
def recommend_course_with_fuzzy(df, course_descriptions, title, num_of_rec, dataset_name):
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(course_descriptions)

    # Preprocess search title
    search_title_processed = nfx.remove_stopwords(nfx.remove_special_characters(title.lower()))

    # Convert DataFrame course titles to lowercase for case-insensitive comparison
    df['course_title_lower'] = df['course_title'].str.lower()

    # Initialize a list to store recommendations
    recommendations = []

    # Iterate through each course title and calculate the similarity score
    for i, course_title in enumerate(df['course_title_lower']):
        course_title_processed = nfx.remove_stopwords(nfx.remove_special_characters(course_title))

        # Calculate Levenshtein distance-based similarity score
        similarity_score = fuzz.token_set_ratio(search_title_processed, course_title_processed) / 100.0
        
        # Append course title, similarity score, and dataset name to recommendations list
        recommendations.append((df.iloc[i]['course_title'], similarity_score ,df.iloc[i]['rating'],dataset_name, df.iloc[i]['course_url']))

    # Sort recommendations based on similarity score in descending order
    recommendations.sort(key=lambda x: x[1], reverse=True)

    # Remove the temporary lowercase column
    df.drop(columns=['course_title_lower'], inplace=True)

    # Convert recommendations list to DataFrame
    recommended_courses_df = pd.DataFrame(recommendations, columns=['course_title', 'similarity_score', 'rating','dataset_name','course_url'])

    # Filter recommendations based on the specified similarity score threshold
    recommended_courses_df = recommended_courses_df[recommended_courses_df['similarity_score'] >= 0.8]

    # Select top 10 recommendations
    recommended_courses_df = recommended_courses_df.head(num_of_rec)

    # Drop the 'similarity_score' column
    recommended_courses_df.drop(columns=['similarity_score'], inplace=True)

    return recommended_courses_df

In [67]:
def load_and_preprocess_data(csv_file_path):
    df = pd.read_csv(csv_file_path)
    df['clean_course_title'] = df['course_title'].apply(nfx.remove_stopwords)
    df['clean_course_title'] = df['clean_course_title'].apply(nfx.remove_special_characters)
    return df

In [68]:
df1 = load_and_preprocess_data(r"C:\Users\HP\Desktop\my-mantine-app\Udemy_courses.csv")

# Load and preprocess second dataset
df2 = load_and_preprocess_data(r"C:\Users\HP\Desktop\my-mantine-app\coursera_courses.csv")
df3= load_and_preprocess_data(r"C:\Users\HP\Desktop\my-mantine-app\Udacity.csv")

In [69]:
title = "python"  # Title of the course you want recommendations for (case-insensitive)
num_of_rec = 10  # Number of recommendations

# Perform recommendation for the first dataset using fuzzy matching
recommended_courses_df1 = recommend_course_with_fuzzy(df1, df1['course_title'].tolist(), title, num_of_rec, "Udemy")

# Perform recommendation for the second dataset using fuzzy matching
recommended_courses_df2 = recommend_course_with_fuzzy(df2, df2['course_title'].tolist(), title, num_of_rec, "Coursera")

# Perform recommendation for the second dataset using fuzzy matching
recommended_courses_df3 = recommend_course_with_fuzzy(df3, df3['course_title'].tolist(), title, num_of_rec, "Udacity")

# Concatenate both recommendation DataFrames
all_recommendations = pd.concat([recommended_courses_df1, recommended_courses_df2, recommended_courses_df3], ignore_index=True)

all_recommendations.sort_values(by='rating', ascending=False, inplace=True)
pd.set_option('display.max_colwidth', None)

print("Top 10 Recommendations with similarity score >= 0.8 (without similarity_score column):")
print(all_recommendations)

Top 10 Recommendations with similarity score >= 0.8 (without similarity_score column):
                                                   course_title  rating  \
19                                      Get Started with Python    4.90   
18               Expressway to Data Science: Python Programming    4.80   
10                             Applied Data Science with Python    4.70   
11                     Automate Cybersecurity Tasks with Python    4.70   
17             Developing AI Applications with Python and Flask    4.70   
13                                    Data Analysis with Python    4.70   
15                               Data Visualization with Python    4.60   
12                                       Crash Course on Python    4.60   
20                                   AI Programming with Python    4.60   
5     Coding for Entrepreneurs: Learn Python, Django, and More.    4.60   
8   Python Programming: Create an Digital Marketplace in Django    4.50   
14           