In [2]:
import pandas as pd
import neattext.functions as nfx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from fuzzywuzzy import fuzz




In [3]:
def recommend_course_with_fuzzy(df, course_descriptions, title, num_of_rec, dataset_name):
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(course_descriptions)

    # Preprocess search title
    search_title_processed = nfx.remove_stopwords(nfx.remove_special_characters(title.lower()))

    # Convert DataFrame course titles to lowercase for case-insensitive comparison
    df['course_title_lower'] = df['course_title'].str.lower()

    # Initialize a list to store recommendations
    recommendations = []

    # Iterate through each course title and calculate the similarity score
    for i, course_title in enumerate(df['course_title_lower']):
        course_title_processed = nfx.remove_stopwords(nfx.remove_special_characters(course_title))

        # Calculate Levenshtein distance-based similarity score
        similarity_score = fuzz.token_set_ratio(search_title_processed, course_title_processed) / 100.0
        
        # Append course title, similarity score, and dataset name to recommendations list
        recommendations.append((df.iloc[i]['course_title'], similarity_score ,dataset_name))

    # Sort recommendations based on similarity score in descending order
    recommendations.sort(key=lambda x: x[1], reverse=True)

    # Remove the temporary lowercase column
    df.drop(columns=['course_title_lower'], inplace=True)
    #df.drop(columns=['similarity_score'], inplace=True)

    # Convert recommendations list to DataFrame
    recommended_courses_df = pd.DataFrame(recommendations[:num_of_rec], columns=['course_title','similarity_score', 'dataset_name'])

    return recommended_courses_df

In [4]:
def load_and_preprocess_data(csv_file_path):
    df = pd.read_csv(csv_file_path)
    df['clean_course_title'] = df['course_title'].apply(nfx.remove_stopwords)
    df['clean_course_title'] = df['clean_course_title'].apply(nfx.remove_special_characters)
    return df

In [5]:
df1 = load_and_preprocess_data(r"C:\Users\HP\Desktop\my-mantine-app\Udemy_courses.csv")

# Load and preprocess second dataset
df2 = load_and_preprocess_data(r"C:\Users\HP\Desktop\my-mantine-app\coursera_courses.csv")

In [7]:
title = "python"  # Title of the course you want recommendations for (case-insensitive)
num_of_rec = 5  # Number of recommendations

# Perform recommendation for the first dataset using fuzzy matching
recommended_courses_df1 = recommend_course_with_fuzzy(df1, df1['course_title'].tolist(), title, num_of_rec, "udemy")

# Perform recommendation for the second dataset using fuzzy matching
recommended_courses_df2 = recommend_course_with_fuzzy(df2, df2['course_title'].tolist(), title, num_of_rec, "Coursera")

# Concatenate both recommendation DataFrames
all_recommendations = pd.concat([recommended_courses_df1, recommended_courses_df2], ignore_index=True)

print("Recommendations:")
print(all_recommendations)

Recommendations:
                                      course_title  similarity_score  \
0                      Web Programming with Python               1.0   
1               Pythonic Python Part I: The Basics               1.0   
2        The Ultimate Python Programming Tutorial                1.0   
3  Learn Python GUI programming using Qt framework               1.0   
4                 Python Programming for Beginners               1.0   
5                 Applied Data Science with Python               1.0   
6         Automate Cybersecurity Tasks with Python               1.0   
7                           Crash Course on Python               1.0   
8                        Data Analysis with Python               1.0   
9    Data Science Fundamentals with Python and SQL               1.0   

  dataset_name  
0        udemy  
1        udemy  
2        udemy  
3        udemy  
4        udemy  
5     Coursera  
6     Coursera  
7     Coursera  
8     Coursera  
9     Coursera  
