In [20]:
import os
import pandas as pd
import warnings


In [22]:
warnings.filterwarnings('ignore')

In [30]:
data = pd.read_csv('udemy_courses.csv')

In [32]:
data.head(1)

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5,2017-01-18T20:58:58Z,Business Finance


In [34]:
data.shape

(3678, 12)

In [36]:
data.isnull().sum()

course_id              0
course_title           0
url                    0
is_paid                0
price                  0
num_subscribers        0
num_reviews            0
num_lectures           0
level                  0
content_duration       0
published_timestamp    0
subject                0
dtype: int64

In [38]:
data.duplicated().any()

True

In [40]:
data[data.duplicated()]

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject
787,837322,Essentials of money value: Get a financial Life !,https://www.udemy.com/essentials-of-money-value/,True,20,0,0,20,All Levels,0.616667,2016-05-16T18:28:30Z,Business Finance
788,1157298,Introduction to Forex Trading Business For Beg...,https://www.udemy.com/introduction-to-forex-tr...,True,20,0,0,27,Beginner Level,1.5,2017-04-23T16:19:01Z,Business Finance
894,1035638,Understanding Financial Statements,https://www.udemy.com/understanding-financial-...,True,25,0,0,10,All Levels,1.0,2016-12-15T14:56:17Z,Business Finance
1100,1084454,CFA Level 2- Quantitative Methods,https://www.udemy.com/cfa-level-2-quantitative...,True,40,0,0,35,All Levels,5.5,2017-07-02T14:29:35Z,Business Finance
1473,185526,MicroStation - Células,https://www.udemy.com/microstation-celulas/,True,20,0,0,9,Beginner Level,0.616667,2014-04-15T21:48:55Z,Graphic Design
2561,28295,Learn Web Designing & HTML5/CSS3 Essentials in...,https://www.udemy.com/build-beautiful-html5-we...,True,75,43285,525,24,All Levels,4.0,2013-01-03T00:55:31Z,Web Development


In [42]:
data = data.drop_duplicates()

In [44]:
data.duplicated().any()

False

In [48]:
### Popularity Based Recommendation System

In [50]:
data.columns

Index(['course_id', 'course_title', 'url', 'is_paid', 'price',
       'num_subscribers', 'num_reviews', 'num_lectures', 'level',
       'content_duration', 'published_timestamp', 'subject'],
      dtype='object')

In [81]:
def pop_rem(data):
    data['pop_score'] = 0.6 * data['num_subscribers'] + 0.4 * data['num_reviews']
    df_sorted = data.sort_values(by='pop_score', ascending=False)[['course_title', 'pop_score']].head()
    return df_sorted

In [83]:
pop_rem(data)

Unnamed: 0,course_title,pop_score
2827,Learn HTML5 Programming From Scratch,164805.4
3032,Coding for Entrepreneurs Basic,96729.0
3230,The Web Developer Bootcamp,83928.4
3232,The Complete Web Developer Course 2.0,77672.0
2783,Build Your First Website in 1 Week with HTML5 ...,74544.2


In [85]:
data.head(1)

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject,pop_score
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5,2017-01-18T20:58:58Z,Business Finance,1297.4


In [87]:
### Content - Based Recommendation System

In [96]:
import neattext.functions as nfx

In [106]:
data['course_title'] = data['course_title'].apply(nfx.remove_stopwords)
data['course_title'] = data['course_title'].apply(nfx.remove_special_characters)

In [186]:
data['title_subject']=data['course_title'] + ' ' +data['subject']

In [188]:
from sklearn.feature_extraction.text import CountVectorizer

In [194]:
cv = CountVectorizer(max_features=3000)
vectors = cv.fit_transform(data['title_subject']).toarray()

In [198]:
vectors

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [200]:
data.shape

(3672, 14)

In [202]:
len(cv.get_feature_names_out())

3000

In [204]:
vectors.shape

(3672, 3000)

In [206]:
from sklearn.metrics.pairwise import cosine_similarity

In [208]:
similarity = cosine_similarity(vectors)

In [210]:
similarity

array([[1.        , 0.4330127 , 0.40824829, ..., 0.        , 0.        ,
        0.        ],
       [0.4330127 , 1.        , 0.35355339, ..., 0.        , 0.        ,
        0.        ],
       [0.40824829, 0.35355339, 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.31622777,
        0.50709255],
       [0.        , 0.        , 0.        , ..., 0.31622777, 1.        ,
        0.26726124],
       [0.        , 0.        , 0.        , ..., 0.50709255, 0.26726124,
        1.        ]])

In [212]:
similarity.shape

(3672, 3672)

In [214]:
similarity[0]

array([1.        , 0.4330127 , 0.40824829, ..., 0.        , 0.        ,
       0.        ])

In [216]:
sorted(enumerate(similarity[0]), key=lambda x: x[1], reverse=True)[1:6]

[(39, 0.7715167498104596),
 (240, 0.6666666666666669),
 (417, 0.6666666666666669),
 (418, 0.6172133998483676),
 (657, 0.6172133998483676)]

In [218]:
data.iloc[39]['course_title']

'Complete Investment Banking Course 2017'

In [220]:
def content_recomm(course):
    course_index = data[data['course_title']==course].index[0]
    sim = similarity[course_index]
    course_list = sorted(enumerate(sim), key=lambda x: x[1], reverse=True)[1:6]
    for i in course_list:
         print(data.iloc[i[0]]['course_title'])

In [222]:
content_recomm("Ultimate Investment Banking Course")

Complete Investment Banking Course 2017
Advanced Accounting Investment Banking
Investment Banking Recruitment Series
Business Banking 101
Financial Accounting  Ultimate Beginner Course


In [224]:
course_index = data[data['course_title']=="Complete Investment Banking Course 2017"].index[0]

In [226]:
###GUI

In [None]:
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd

# Assume 'data', 'similarity' are defined elsewhere

# Define the popularity-based recommendation function
def popularity_based_recommendation(df, top_n=5):
    df['popularity_score'] = 0.6 * df['num_subscribers'] + 0.4 * df['num_reviews']
    df_sorted = df.sort_values(by='popularity_score', ascending=False)
    recommended_courses = df_sorted[['course_title', 'popularity_score']].head(top_n)
    return recommended_courses

# Define the recommend function
def recommend(course):
    try:
        course_index = data[data['course_title'] == course].index[0]
        distances = similarity[course_index]
        courses_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]
        recommended_courses = [data.iloc[i[0]]['course_title'] for i in courses_list]
        return recommended_courses
    except IndexError:
        messagebox.showerror("Error", f"Course '{course}' not found.")

# Event handler for the "Recommend" button
def recommend_button_click():
    course_title = course_var.get()
    recommended_courses = recommend(course_title)
    if recommended_courses:
        popularity_label.pack_forget()
        result_label.config(text="Recommended Courses:\n" + '\n'.join(recommended_courses))

# Create the main application window
root = tk.Tk()
root.title("Course Recommender")
root.geometry("400x300")

# Change font and color
font_style = ("Arial", 12)
label_color = "blue"
heading_color="red"
button_color = "green"
result_label_color = "black"

# Create and place GUI elements
label = tk.Label(root, text="Select Course:", font=font_style, fg=label_color)
label.pack(pady=10)

course_titles = data['course_title'].tolist()
course_var = tk.StringVar(value=course_titles[0])
course_dropdown = ttk.Combobox(root, textvariable=course_var, values=course_titles, width=40, font=font_style)
course_dropdown.pack(pady=5)

popularity_recommendations = popularity_based_recommendation(data, top_n=5)
popularity_label = tk.Label(root, text="Popularity-based Recommendations:\n" + popularity_recommendations.to_string(index=False),
                             font=font_style, fg=label_color)
popularity_label.pack()

recommend_button = tk.Button(root, text="Recommend", command=recommend_button_click, width=20, font=font_style, fg=button_color)
recommend_button.pack(pady=10)

result_label = tk.Label(root, text="", wraplength=350, font=font_style, fg=result_label_color)
result_label.pack()

root.mainloop()