In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.stem.porter import PorterStemmer
import zipfile

Unzipping the dataset

In [None]:
with zipfile.ZipFile("/content/MOOC.csv (1).zip", 'r') as zip_ref:
    zip_ref.extractall("/content/")

Reading the CSV file with specified encoding

In [None]:
data = pd.read_csv("/content/MOOC.csv", encoding='ISO-8859-1')

Selecting the necessary columns

In [None]:
data = data[['Course Name', 'Difficulty Level', 'Course Rating', 'Course Description', 'all_skill']]

Function to clean text

In [None]:
def clean_text(text):
    if isinstance(text, str):
        text = text.replace(' ', ',')
        text = text.replace(',,', ',')
        text = text.replace(':', '')
        text = text.replace('(', '')
        text = text.replace(')', '')
        text = text.replace('_', '')
    return text

Applying the clean_text function and handling NaN values

In [None]:
data['Course Name'] = data['Course Name'].apply(clean_text)
data['Course Description'] = data['Course Description'].fillna('').apply(clean_text)
data['all_skill'] = data['all_skill'].fillna('').str.replace('(', '').str.replace(')', '')

Creating the 'tags' column

In [None]:
data['tags'] = data['Course Name'] + data['Difficulty Level'] + data['Course Description'] + data['all_skill']

Preparing the dataframe for vectorization

In [None]:
new_df = data[['Course Name', 'tags']]
new_df['tags'] = new_df['tags'].str.replace(',', ' ')
new_df['Course Name'] = new_df['Course Name'].str.replace(',', ' ')
new_df.rename(columns={'Course Name': 'course_name'}, inplace=True)
new_df['tags'] = new_df['tags'].fillna('').apply(lambda x: x.lower() if isinstance(x, str) else '')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].str.replace(',', ' ')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['Course Name'] = new_df['Course Name'].str.replace(',', ' ')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df.rename(columns={'Course Name': 'course_name'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try u

Vectorizing the text

In [None]:
cv = CountVectorizer(max_features=5000, stop_words='english')
vectors = cv.fit_transform(new_df['tags']).toarray()

Downloading necessary NLTK data

In [None]:
nltk.download('punkt')
ps = PorterStemmer()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Function to stem the text

In [None]:
def stem(text):
    y = [ps.stem(word) for word in text.split()]
    return " ".join(y)

Applying the stem function to the 'tags' column

In [None]:
new_df['tags'] = new_df['tags'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(stem)


Calculating cosine similarity

In [None]:
similarity = cosine_similarity(vectors)

Function to recommend courses

In [None]:
def recommend(course):
    course_index = new_df[new_df['course_name'] == course].index[0]
    distances = similarity[course_index]
    course_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:7]

    for i in course_list:
        print(new_df.iloc[i[0]].course_name)

Testing the recommend function

In [None]:
recommend('Business Strategy Business Model Canvas Analysis with Miro')

Product Development Customer Persona Development with Miro
Product and Service Development Empathy Mapping with Miro
Product Development Customer Journey Mapping with Miro
Analyzing Macro-Environmental Factors Using Creately
Business Strategy in Practice Project-centered Course
Innovating with the Business Model Canvas


In [None]:
recommend('Build a Twitter Clone Backend')

Introduction to Front-end Development with ReactJS
Getting Started with Blazor WASM
Creating a Personal Site with Gatsby
Getting Started with ASP.NET Core Razor Pages
Build local development environments using Docker containers
Project Creating Your First C++ Application


In [None]:
recommend('Introduction to Python')

Python Basics Create a Guessing Number Game from Scratch
Create Your First Python Program
Create Your First Game with Python
Build a Guessing Game Application using Java
Create an interactive fiction adventure game with Python
Create Your First Web App with Python and Flask


Saving the model

In [None]:
import pickle
with open('course_recommender.pkl', 'wb') as file:
    pickle.dump(similarity, file)