In [1]:
# Load EDA Pkgs
import pandas as pd
import neattext.functions as nfx

In [2]:
# Load ML/Rc Pkgs
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity,linear_kernel

In [4]:
# Load our dataset
df = pd.read_csv("udemy_courses.csv")

In [5]:
df.head()

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5 hours,2017-01-18T20:58:58Z,Business Finance
1,1113822,Complete GST Course & Certification - Grow You...,https://www.udemy.com/goods-and-services-tax/,True,75,2792,923,274,All Levels,39 hours,2017-03-09T16:34:20Z,Business Finance
2,1006314,Financial Modeling for Business Analysts and C...,https://www.udemy.com/financial-modeling-for-b...,True,45,2174,74,51,Intermediate Level,2.5 hours,2016-12-19T19:26:30Z,Business Finance
3,1210588,Beginner to Pro - Financial Analysis in Excel ...,https://www.udemy.com/complete-excel-finance-c...,True,95,2451,11,36,All Levels,3 hours,2017-05-30T20:07:24Z,Business Finance
4,1011058,How To Maximize Your Profits Trading Options,https://www.udemy.com/how-to-maximize-your-pro...,True,200,1276,45,26,Intermediate Level,2 hours,2016-12-13T14:57:18Z,Business Finance


In [6]:
df['course_title']

0                      Ultimate Investment Banking Course
1       Complete GST Course & Certification - Grow You...
2       Financial Modeling for Business Analysts and C...
3       Beginner to Pro - Financial Analysis in Excel ...
4            How To Maximize Your Profits Trading Options
                              ...                        
3678    Learn jQuery from Scratch - Master of JavaScri...
3679    How To Design A WordPress Website With No Codi...
3680                        Learn and Build using Polymer
3681    CSS Animations: Create Amazing Effects on Your...
3682    Using MODX CMS to Build Websites: A Beginner's...
Name: course_title, Length: 3683, dtype: object

In [7]:
dir(nfx)

['BTC_ADDRESS_REGEX',
 'CURRENCY_REGEX',
 'CURRENCY_SYMB_REGEX',
 'Counter',
 'DATE_REGEX',
 'EMAIL_REGEX',
 'EMOJI_REGEX',
 'HASTAG_REGEX',
 'MASTERCard_REGEX',
 'MD5_SHA_REGEX',
 'MOST_COMMON_PUNCT_REGEX',
 'NUMBERS_REGEX',
 'PHONE_REGEX',
 'PoBOX_REGEX',
 'SPECIAL_CHARACTERS_REGEX',
 'STOPWORDS',
 'STOPWORDS_de',
 'STOPWORDS_en',
 'STOPWORDS_es',
 'STOPWORDS_fr',
 'STOPWORDS_ru',
 'STOPWORDS_yo',
 'STREET_ADDRESS_REGEX',
 'TextFrame',
 'URL_PATTERN',
 'USER_HANDLES_REGEX',
 'VISACard_REGEX',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__generate_text',
 '__loader__',
 '__name__',
 '__numbers_dict',
 '__package__',
 '__spec__',
 '_lex_richness_herdan',
 '_lex_richness_maas_ttr',
 'clean_text',
 'defaultdict',
 'digit2words',
 'extract_btc_address',
 'extract_currencies',
 'extract_currency_symbols',
 'extract_dates',
 'extract_emails',
 'extract_emojis',
 'extract_hashtags',
 'extract_html_tags',
 'extract_mastercard_addr',
 'extract_md5sha',
 'extract_numbers',
 'extr

In [8]:
# Clean Text:stopwords,special charac
df['clean_course_title'] = df['course_title'].apply(nfx.remove_stopwords)

In [9]:
# Clean Text:stopwords,special charac
df['clean_course_title'] = df['clean_course_title'].apply(nfx.remove_special_characters)

In [10]:
df[['course_title','clean_course_title']]

Unnamed: 0,course_title,clean_course_title
0,Ultimate Investment Banking Course,Ultimate Investment Banking Course
1,Complete GST Course & Certification - Grow You...,Complete GST Course Certification Grow Practice
2,Financial Modeling for Business Analysts and C...,Financial Modeling Business Analysts Consultants
3,Beginner to Pro - Financial Analysis in Excel ...,Beginner Pro Financial Analysis Excel 2017
4,How To Maximize Your Profits Trading Options,Maximize Profits Trading Options
...,...,...
3678,Learn jQuery from Scratch - Master of JavaScri...,Learn jQuery Scratch Master JavaScript library
3679,How To Design A WordPress Website With No Codi...,Design WordPress Website Coding
3680,Learn and Build using Polymer,Learn Build Polymer
3681,CSS Animations: Create Amazing Effects on Your...,CSS Animations Create Amazing Effects Website


In [11]:
# Vectorize our Text
count_vect = CountVectorizer()
cv_mat = count_vect.fit_transform(df['clean_course_title'])

In [12]:
# Sparse
cv_mat

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 18364 stored elements and shape (3683, 3564)>

In [13]:
# Dense
cv_mat.todense()

matrix([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [16]:
df_cv_words = pd.DataFrame(cv_mat.todense(),columns=count_vect.get_feature_names_out())

In [17]:
df_cv_words.head()

Unnamed: 0,000005,001,01,02,10,100,101,101master,102,10k,...,zend,zero,zerotohero,zf2,zinsen,zoho,zombie,zu,zuhause,zur
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
# Cosine Similarity Matrix
cosine_sim_mat = cosine_similarity(cv_mat)

In [19]:
cosine_sim_mat

array([[1.        , 0.20412415, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.20412415, 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.23570226],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.23570226, 0.        ,
        1.        ]])

In [20]:
# import seaborn as sns
# sns.heatmap(cosine_sim_mat[0:10],annot=True)

In [21]:
df.head()

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject,clean_course_title
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5 hours,2017-01-18T20:58:58Z,Business Finance,Ultimate Investment Banking Course
1,1113822,Complete GST Course & Certification - Grow You...,https://www.udemy.com/goods-and-services-tax/,True,75,2792,923,274,All Levels,39 hours,2017-03-09T16:34:20Z,Business Finance,Complete GST Course Certification Grow Practice
2,1006314,Financial Modeling for Business Analysts and C...,https://www.udemy.com/financial-modeling-for-b...,True,45,2174,74,51,Intermediate Level,2.5 hours,2016-12-19T19:26:30Z,Business Finance,Financial Modeling Business Analysts Consultants
3,1210588,Beginner to Pro - Financial Analysis in Excel ...,https://www.udemy.com/complete-excel-finance-c...,True,95,2451,11,36,All Levels,3 hours,2017-05-30T20:07:24Z,Business Finance,Beginner Pro Financial Analysis Excel 2017
4,1011058,How To Maximize Your Profits Trading Options,https://www.udemy.com/how-to-maximize-your-pro...,True,200,1276,45,26,Intermediate Level,2 hours,2016-12-13T14:57:18Z,Business Finance,Maximize Profits Trading Options


In [22]:
# Get Course ID/Index
course_indices = pd.Series(df.index,index=df['course_title']).drop_duplicates()

In [23]:
course_indices

course_title
Ultimate Investment Banking Course                                0
Complete GST Course & Certification - Grow Your CA Practice       1
Financial Modeling for Business Analysts and Consultants          2
Beginner to Pro - Financial Analysis in Excel 2017                3
How To Maximize Your Profits Trading Options                      4
                                                               ... 
Learn jQuery from Scratch - Master of JavaScript library       3678
How To Design A WordPress Website With No Coding At All        3679
Learn and Build using Polymer                                  3680
CSS Animations: Create Amazing Effects on Your Website         3681
Using MODX CMS to Build Websites: A Beginner's Guide           3682
Length: 3683, dtype: int64

In [24]:
course_indices['How To Maximize Your Profits Trading Options']

4

In [25]:
idx = course_indices['How To Maximize Your Profits Trading Options']

In [26]:
idx

4

In [27]:
scores = list(enumerate(cosine_sim_mat[idx]))

scores

In [29]:
# Sort our scores per cosine score
sorted_scores = sorted(scores,key=lambda x:x[1],reverse=True)

# Omit the First Value/itself
sorted_scores[1:]

In [31]:
# Selected Courses Indices
selected_course_indices = [i[0] for i in sorted_scores[1:]]

selected_course_indices

In [33]:
# Selected Courses Scores
selected_course_scores = [i[1] for i in sorted_scores[1:]]

In [34]:
recommended_result = df['course_title'].iloc[selected_course_indices]

In [35]:
rec_df = pd.DataFrame(recommended_result)

In [36]:
rec_df.head()

Unnamed: 0,course_title
410,Trading Options Basics
43,Options Trading - How to Win with Weekly Options
96,Intermediate Options trading concepts for Stoc...
138,Forex Trading with Fixed 'Risk through Options...
195,Trading Options For Consistent Returns: Option...


In [37]:
rec_df['similarity_scores'] = selected_course_scores

In [38]:
rec_df

Unnamed: 0,course_title,similarity_scores
410,Trading Options Basics,0.577350
43,Options Trading - How to Win with Weekly Options,0.566947
96,Intermediate Options trading concepts for Stoc...,0.530330
138,Forex Trading with Fixed 'Risk through Options...,0.530330
195,Trading Options For Consistent Returns: Option...,0.530330
...,...,...
3678,Learn jQuery from Scratch - Master of JavaScri...,0.000000
3679,How To Design A WordPress Website With No Codi...,0.000000
3680,Learn and Build using Polymer,0.000000
3681,CSS Animations: Create Amazing Effects on Your...,0.000000


In [1]:
import pandas as pd
import random
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

file_path = "ENROLLMENT_NPTEL.xlsx"
data = pd.read_excel(file_path)

# Load the Udemy courses dataset
udemy_courses_path = 'udemy_courses.csv'
udemy_courses = pd.read_csv(udemy_courses_path)

def create_cosine_sim_matrix(courses):
    count_vect = CountVectorizer(stop_words='english')
    cv_mat = count_vect.fit_transform(courses['course_title'])
    cosine_sim_mat = cosine_similarity(cv_mat)
    return cosine_sim_mat, courses['course_title'], count_vect

cosine_sim_mat, course_titles, count_vect = create_cosine_sim_matrix(udemy_courses)

def get_course_by_user_name(user_name, data):
    # Search for the user by name (case-insensitive)
    user_record = data[data['Name'].str.contains(user_name, case=False, na=False)]
    if not user_record.empty:
        return user_record[['Name', 'CourseName']]
    else:
        return f"No records found for user: {user_name}"

def recommend_similar_course(title):
    # Vectorize the input course title along with the existing titles
    all_titles = course_titles.tolist() + [title]
    all_cv_mat = count_vect.transform(all_titles)
    cosine_sim_mat = cosine_similarity(all_cv_mat)
    
    # Calculate the similarity scores for the input title
    sim_scores = cosine_sim_mat[-1][:-1]
    if sim_scores.size == 0:
        return pd.DataFrame({'course_title': ['No similar course found']})
    
    # Get the index of the course with the highest similarity score
    highest_sim_index = sim_scores.argmax()
    # Select the most similar course
    selected_course = udemy_courses.iloc[highest_sim_index].copy()
    selected_course['similarity_score'] = sim_scores[highest_sim_index]
    return selected_course[['course_title', 'similarity_score']]

user_name = input('Enter the Name: ')
course_info = get_course_by_user_name(user_name, data)
print(course_info)

if isinstance(course_info, pd.DataFrame) and not course_info.empty:
    course_title = course_info.iloc[0]['CourseName']
    print(f"Recommendations based on the user's course ({course_title}):")
    
    # Recommend the most similar course from the Udemy dataset
    similar_recommendation = recommend_similar_course(course_title)
    print("Most similar course from the Udemy dataset:")
    print(similar_recommendation)
    
else:
    print(course_info)


Enter the Name:  Aubry Mewe


          Name                                         CourseName
34  Aubry Mewe  Practical Cyber Security for Cyber Security Pr...
Recommendations based on the user's course (Practical Cyber Security for Cyber Security Practitioners):
Most similar course from the Udemy dataset:
course_title        WordPress Security - The Ultimate Security Guide
similarity_score                                            0.676123
Name: 3462, dtype: object
