In [1]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import re
# Load datasets
data = pd.read_csv('Udemy_final_dataset.csv')
data_coursera = pd.read_csv("data_coursera_updated.csv")
data_edx = pd.read_csv("edx_courses.csv")


def clean_numeric_strings(value):
    if isinstance(value, str):
        cleaned_value = re.sub(r'[^\d.]', '', value)
    else:
        cleaned_value = str(value)
    return cleaned_value if cleaned_value else '0'

# Clean the 'rating' column
data['rating'] = data['rating'].apply(clean_numeric_strings)

# Convert the 'rating' column to numeric values, coercing errors to NaN, then to integers
data['rating'] = pd.to_numeric(data['rating'], errors='coerce').fillna(0).astype(int)
data_coursera['features'] = data_coursera['course'] + ' ' + data_coursera['reviewcount'].astype(str) + ' ' + \
                            data_coursera['level']
data_coursera['features'] = data_coursera['features'].fillna('')
vectorizer_coursera = TfidfVectorizer(stop_words='english')
feature_matrix_coursera = vectorizer_coursera.fit_transform(data_coursera['features'])

# Preprocess edX data
text_features = ['title', 'summary', 'instructors', 'Level', 'price', 'course_url']
data_edx['combined_text'] = data_edx[text_features].astype(str).apply(lambda x: ' '.join(x), axis=1)
vectorizer_edx = TfidfVectorizer(stop_words='english')
feature_matrix_edx = vectorizer_edx.fit_transform(data_edx['combined_text'])

# Process Udemy recommendations
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
data['description'] = data['description'].fillna('')
tfidf_matrix = tfidf_vectorizer.fit_transform(data['description'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [2]:

def get_udemy_recommendations(title, min_rating=0):
    matches = data[data['title'].str.contains(title, case=False)]
    if not matches.empty:
        idx1 = matches.index[0]
        sim_scores = list(enumerate(cosine_sim[idx1]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[1:11]
        course_indices = [score[0] for score in sim_scores]
        recommended_courses = data.iloc[course_indices]
        
        # Check if the 'description' column exists and has non-NaN values
        if 'description' in recommended_courses.columns:
            recommended_courses = recommended_courses[
                recommended_courses['description'].notna() &
                recommended_courses['description'].str.contains(title, case=False)
            ]
        
        # Filter based on rating
        if 'rating' in recommended_courses.columns:
            recommended_courses = recommended_courses[
                recommended_courses['rating'] >= min_rating
            ]
    else:
        recommended_courses = None
    
    return recommended_courses



def get_coursera_recommendations(title, min_rating=0, top_n=10):
    # Convert the input title into a feature vector
    input_features_vector = vectorizer_coursera.transform([title])
    
    # Compute cosine similarity between the input feature vector and the feature matrix
    similarity_scores = cosine_similarity(feature_matrix_coursera, input_features_vector)
    
    # Get indices of the top similar courses
    similar_indices = similarity_scores.argsort(axis=0)[-top_n - 1:-1][::-1]
    
    # Fetch the top recommendations from the data_coursera DataFrame
    top_recommendations = data_coursera.iloc[similar_indices.flatten()]
    
    # Check if the 'rating' column exists and has non-NaN values
    if 'rating' in top_recommendations.columns:
        top_recommendations = top_recommendations[top_recommendations['rating'].notna()]
        top_recommendations['rating'] = pd.to_numeric(top_recommendations['rating'], errors='coerce')
        top_recommendations = top_recommendations[top_recommendations['rating'] >= min_rating]
    
    # Sort by rating in descending order
    top_recommendations = top_recommendations.sort_values(by='rating', ascending=False)
    
    return top_recommendations

# Process edX recommendations

def get_edx_recommendations(title, top_n=10):
    # Convert the input title into a feature vector
    input_features_edx_vector = vectorizer_edx.transform([title])
    
    # Compute cosine similarity between the input feature vector and the feature matrix
    similarity_scores_edx = cosine_similarity(feature_matrix_edx, input_features_edx_vector)
    
    # Get indices of the top similar courses
    similar_indices_edx = similarity_scores_edx.argsort(axis=0)[-top_n - 1:-1][::-1]
    
    # Fetch the top recommendations from the data_edx DataFrame
    top_edx = data_edx.iloc[similar_indices_edx.flatten()]
    
    return top_edx

In [3]:
get_udemy_recommendations('flutter')

Unnamed: 0,title,description,instructor,rating,reviewcount,duration,lectures,level,url
2787,"Complete Flutter Guide 2023: Build Android, iO...","The Complete Flutter SDK, Flutter Framework & ...","Sagnik Bhattacharya, Paulina Knop",4,420.0,20.5 total hours,224 lectures,All Levels,https://www.udemy.com/courses/search/?q=Comple...
1997,Dart & Flutter | The Complete Flutter Developm...,2023 Complete Guide To Flutter Development - B...,"Hussain Mustafa, Codestars • over 2 million st...",4,744.0,19 total hours,165 lectures,All Levels,https://www.udemy.com/courses/search/?q=Dart &...
1384,The Complete Flutter App Development Course fo...,A Complete Guide to the Flutter Framework for ...,Smartherd Developers,4,1263.0,4.5 total hours,33 lectures,All Levels,https://www.udemy.com/courses/search/?q=The Co...
3493,The Ultimate Dart & Flutter Course 2023,A Complete Guide / Tutorial to the Flutter & D...,Rivaan Ranawat,4,293.0,44 total hours,332 lectures,All Levels,https://www.udemy.com/courses/search/?q=The Ul...
8607,Flutter News Portal App-Firestore Backend(Andr...,Build Flutter ios and Android Apps Using Fires...,Yazdani Chowdhury,4,47.0,7.5 total hours,39 lectures,All Levels,https://www.udemy.com/courses/search/?q=Flutte...
6768,Flutter 3 with Flutter Projects on Dart Progra...,Flutter Dart course from scratch. Learn Flutte...,"Oak Academy, OAK Academy Team",4,81.0,23 total hours,158 lectures,All Levels,https://www.udemy.com/courses/search/?q=Flutte...
408,Flutter & Dart - The Complete Flutter App Deve...,Build Flutter iOS and Android Apps with a Sing...,"Paulo Dichone | Software Engineer, AWS Cloud P...",4,5252.0,26 total hours,226 lectures,All Levels,https://www.udemy.com/courses/search/?q=Flutte...
4322,Flutter and Dart | Flutter Dart Programming fr...,Complete Flutter Dart with Flutter 3 projects ...,"Oak Academy, OAK Academy Team",4,200.0,30.5 total hours,225 lectures,All Levels,https://www.udemy.com/courses/search/?q=Flutte...
5302,Flutter ios&Android App With Firebase Backend ...,Build Complete ios & Android app With Flutter ...,Yazdani Chowdhury,3,136.0,1.5 total hours,11 lectures,All Levels,https://www.udemy.com/courses/search/?q=Flutte...
5391,Full Stack Mobile Application Development - Ma...,"Native Android,Native IOS,Flutter,React Native...",The Apps Firm,3,133.0,10.5 total hours,92 lectures,Beginner,https://www.udemy.com/courses/search/?q=Full S...


In [18]:
get_coursera_recommendations('python')

Unnamed: 0,partner,course,skills,rating,reviewcount,level,certificatetype,duration,crediteligibility,url,features
336,University of Michigan,Python Basics,"{"" Computer Programming"","" Python Programming""...",4.8,16.9k,Beginner,Course,1 - 4 Weeks,False,https://www.coursera.org/search?query=Python B...,Python Basics 16.9k Beginner
69,University of Michigan,Python 3 Programming,"{"" Python Programming"","" Computer Programming""...",4.7,21.3k,Beginner,Specialization,3 - 6 Months,False,https://www.coursera.org/search?query=Python 3...,Python 3 Programming 21.3k Beginner
858,Rice University,Introduction to Scripting in Python,"{"" Computer Programming"","" Programming Princip...",4.7,4.2k,Beginner,Specialization,3 - 6 Months,False,https://www.coursera.org/search?query=Introduc...,Introduction to Scripting in Python 4.2k Begin...
361,IBM,Data Analysis with Python,"{"" Python Programming"","" Data Analysis"","" Mach...",4.7,17.3k,Beginner,Course,1 - 3 Months,False,https://www.coursera.org/search?query=Data Ana...,Data Analysis with Python 17.3k Beginner
780,University of Michigan,Statistics with Python,"{"" General Statistics"","" Probability & Statist...",4.6,3.1k,Beginner,Specialization,1 - 3 Months,False,https://www.coursera.org/search?query=Statisti...,Statistics with Python 3.1k Beginner
404,IBM,Python Project for Data Science,"{"" Python Programming"","" Computer Programming""...",4.5,3.7k,Intermediate,Course,1 - 4 Weeks,False,https://www.coursera.org/search?query=Python P...,Python Project for Data Science 3.7k Intermedi...
953,Infosec,Python for Cybersecurity,"{"" Python Programming"","" Computer Programming""}",4.5,334,Intermediate,Specialization,3 - 6 Months,False,https://www.coursera.org/search?query=Python f...,Python for Cybersecurity 334 Intermediate
635,University of Pennsylvania,Introduction to Python Programming,"{"" Python Programming"","" Computer Programming""...",4.5,998,Beginner,Course,1 - 4 Weeks,False,https://www.coursera.org/search?query=Introduc...,Introduction to Python Programming 998 Beginner
815,Pontificia Universidad Católica de Chile,Introducción a la programación en Python I: Ap...,"{"" Computational Logic"","" Computer Programming...",4.4,4.2k,Beginner,Course,1 - 3 Months,False,https://www.coursera.org/search?query=Introduc...,Introducción a la programación en Python I: Ap...
201,University of Pennsylvania,Introduction to Programming with Python and Java,"{"" Computer Programming"","" Python Programming""...",4.4,1.5k,Beginner,Specialization,3 - 6 Months,False,https://www.coursera.org/search?query=Introduc...,Introduction to Programming with Python and Ja...


In [76]:
get_edx_recommendations("html")

Unnamed: 0,title,summary,n_enrolled,course_type,institution,instructors,Level,subject,language,subtitles,course_effort,course_length,price,course_description,course_syllabus,course_url,combined_text
304,Designing and Creating Skirts,Learn the construction and style adaptations o...,13954,Self-paced on your time,The Hong Kong Polytechnic University,Tanya Dove-Kristina Shin-Jalice Cheng-Marco Re...,Introductory,Design,English,English,3–5 hours per week,5 Weeks,FREE-Add a Verified Certificate for $99 USD,Designing and creating skirts will teach learn...,_Week 1 – Understand how to record body measur...,https://www.edx.org/course/designing-and-creat...,Designing and Creating Skirts Learn the constr...
332,Introducción a la ciencia de datos y sus aplic...,El arte de descubrir las percepciones y tenden...,7303,Self-paced on your time,IBM,Alex Aklson,Introductory,Data Analysis & Statistics,Español,Español,3–6 hours per week,6 Weeks,FREE-Add a Verified Certificate for $39 USD,Los antiguos egipcios aplicaron datos del cens...,,https://www.edx.org/course/Introduccion-a-la-c...,Introducción a la ciencia de datos y sus aplic...
331,Strategic Management,This course is part of a MicroMasters® Program,71085,Self-paced on your time,Indian Institute of Management Bangalore,P D Jose-Rejie George Pallathita-Sai Yayavaram,Introductory,Business & Management,English,"English, हिन्दी",3–5 hours per week,5 Weeks,FREE-Add a Verified Certificate for $150 USD,"In this business and management course, you wi...",,https://www.edx.org/course/strategic-management,Strategic Management This course is part of a ...
330,Cybersecurity and Privacy in the IoT,Learn about the security and privacy implicati...,21440,Self-paced on your time,Curtin University,Iain Murray AM-Nazanin Mohammadi-Eleanor Sandry,Introductory,Computer Science,English,English,4–6 hours per week,5 Weeks,FREE-Add a Verified Certificate for $199 USD,As the Internet of Things (IoT) continues to g...,,https://www.edx.org/course/cybersecurity-and-p...,Cybersecurity and Privacy in the IoT Learn abo...
329,How to Code: Simple Data,This course is part of a MicroMasters® Program,98113,Self-paced on your time,University of British Columbia,Gregor Kiczales,Introductory,Computer Science,English,English,2–3 hours per week,7 Weeks,FREE-Add a Verified Certificate for $125 USD,This programming course takes a unique approac...,,https://www.edx.org/course/how-to-code-simple-...,How to Code: Simple Data This course is part o...
328,Agile Leadership Principles and Practices,Accelerate and improve team decisions by learn...,31838,Self-paced on your time,"The University of Maryland, College Park-Unive...",John Johnson,Advanced,Engineering,English,English,2–3 hours per week,4 Weeks,FREE-Add a Verified Certificate for $199 USD,Version 2 of this course series delivers beyon...,Week 1: The first week of this course jumps ri...,https://www.edx.org/course/agile-leadership-pr...,Agile Leadership Principles and Practices Acce...
327,The Ancient Greek Hero,Discover the literature and heroes of ancient ...,51744,Instructor-led on a course schedule,Harvard University,Gregory Nagy-Leonard Muellner-Kevin McGrath-Ke...,Introductory,Humanities,English,English,8–12 hours per week,18 Weeks,FREE-Add a Verified Certificate for $99 USD,Explore what it means to be human today by stu...,,https://www.edx.org/course/the-ancient-greek-h...,The Ancient Greek Hero Discover the literature...
326,Android: Introducción a la Programación,"Desarrollaremos una aplicación de ejemplo, ""Mi...",85786,Self-paced on your time,Universitat Politècnica de Valencia,Jesús Tomás Gironés,Introductory,Computer Science,Español,Español,4–5 hours per week,10 Weeks,FREE-Add a Verified Certificate for $50 USD,Android es la plataforma libre desarrollada po...,,https://www.edx.org/course/android-introduccio...,Android: Introducción a la Programación Desarr...
325,Anatomy: Human Neuroanatomy,Learn about the different parts of the central...,49319,Self-paced on your time,The University of Michigan,Kelli A. Sullivan,Introductory,Biology & Life Sciences,English,English,3–4 hours per week,8 Weeks,FREE-Add a Verified Certificate for $49 USD,"In this anatomy course, part of the Anatomy XS...",,https://www.edx.org/course/anatomy-human-neuro...,Anatomy: Human Neuroanatomy Learn about the di...
324,Public Financial Management,What is the government budget cycle? How shoul...,23288,Self-paced on your time,The International Monetary Fund,Staff of the Fiscal Affairs Department,Intermediate,Economics & Finance,English,English,4–6 hours per week,8 Weeks,FREE-Add a Verified Certificate for $25 USD,This IMFx course is designed to strengthen par...,,https://www.edx.org/course/public-financial-ma...,Public Financial Management What is the govern...
