# **1. Import Library**

Pada tahap ini, Anda perlu mengimpor beberapa pustaka (library) Python yang dibutuhkan untuk analisis data dan pembangunan model machine learning atau deep learning.

In [14]:
import numpy as np
np.random.seed(0)  # Pastikan seed di-set sebelum pengacakan

# GENERAL
import kagglehub
import pandas as pd
import random
import time
import os
from collections import Counter
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix

# Visualization
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

# SCALER & TRANSFORMATION
from sklearn.preprocessing import StandardScaler, LabelEncoder

# IGNORING WARNINGS
import warnings
warnings.filterwarnings("ignore")

# Configure Defaults
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
pd.set_option("display.float_format", lambda x: "%.4f" % x)

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **2. Load Dataset**

In [16]:
dataPath = '/content/drive/MyDrive/Colab Notebooks/Capstone - Course Recommender Systems/Dataset/' # Path of the dataset

In [17]:
# Load the CSV file from the URL
df_courses = pd.read_csv(dataPath + 'udemy_courses_raw.csv')

In [18]:
print(df_courses['course_id'].duplicated().sum())

6


In [19]:
df_courses.drop_duplicates(subset=["course_id"], inplace=True)

In [20]:
df_courses.head()

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5,2017-01-18T20:58:58Z,Business Finance
1,1113822,Complete GST Course & Certification - Grow You...,https://www.udemy.com/goods-and-services-tax/,True,75,2792,923,274,All Levels,39.0,2017-03-09T16:34:20Z,Business Finance
2,1006314,Financial Modeling for Business Analysts and C...,https://www.udemy.com/financial-modeling-for-b...,True,45,2174,74,51,Intermediate Level,2.5,2016-12-19T19:26:30Z,Business Finance
3,1210588,Beginner to Pro - Financial Analysis in Excel ...,https://www.udemy.com/complete-excel-finance-c...,True,95,2451,11,36,All Levels,3.0,2017-05-30T20:07:24Z,Business Finance
4,1011058,How To Maximize Your Profits Trading Options,https://www.udemy.com/how-to-maximize-your-pro...,True,200,1276,45,26,Intermediate Level,2.0,2016-12-13T14:57:18Z,Business Finance


## **3. Generate Synthetic User Interactions dengan jenis interaksi**

In [21]:
num_users = 500
num_courses = len(df_courses)

interaction_types = ['view', 'buy', 'enrolled', 'complete']
interaction_probs = [0.5, 0.2, 0.2, 0.1]

user_ids_list = []
course_ids_list = []
interaction_types_list = []

for idx, row in df_courses.iterrows():
    course_id = row['course_id']

    # Random jumlah interaksi per course antara 10 sampai 50
    n_interactions = np.random.randint(10, 51)

    # Pilih user_id acak sebanyak n_interactions (angka)
    chosen_user_ids_num = np.random.choice(range(1, num_users + 1), size=n_interactions, replace=True)
    # Tambahkan prefix 'user_' ke user_id
    chosen_user_ids = ['user_' + str(uid) for uid in chosen_user_ids_num]

    # Pilih jenis interaksi berdasarkan probabilitas
    interaction_values = np.random.choice(interaction_types, size=n_interactions, p=interaction_probs)

    user_ids_list.extend(chosen_user_ids)
    course_ids_list.extend([course_id] * n_interactions)
    interaction_types_list.extend(interaction_values)

df_interactions = pd.DataFrame({
    'user_id': user_ids_list,
    'course_id': course_ids_list,
    'interaction_type': interaction_types_list
})

# Jika perlu, kamu bisa buat kolom 'interaction' yang binary (1 = ada interaksi)
df_interactions['interaction'] = 1

In [22]:
df_interactions.head()

Unnamed: 0,user_id,course_id,interaction_type,interaction
0,user_324,1070968,enrolled,1
1,user_252,1070968,complete,1
2,user_196,1070968,view,1
3,user_360,1070968,enrolled,1
4,user_10,1070968,buy,1


# **4. Aggregate Interaction Data and Merge Back to Courses**

In [23]:
df_interactions_aggregated = df_interactions.groupby('course_id').agg(
    total_interactions=('interaction', 'sum'),
    total_users=('user_id', 'nunique')
).reset_index()

df_courses = pd.merge(df_courses, df_interactions_aggregated, on='course_id', how='left')

# **5. Export Synthetic Interactions**

In [24]:
df_interactions.to_csv(dataPath + 'synthetic_user_interactions.csv', index=False)
print(f"File '{dataPath}synthetic_user_interactions.csv' berhasil disimpan.")

File '/content/drive/MyDrive/Colab Notebooks/Capstone - Course Recommender Systems/Dataset/synthetic_user_interactions.csv' berhasil disimpan.


In [25]:
df_courses.to_csv(dataPath + 'udemy_courses_new.csv', index=False)
print(f"File '{dataPath}synthetic_user_interactions.csv' berhasil disimpan.")

File '/content/drive/MyDrive/Colab Notebooks/Capstone - Course Recommender Systems/Dataset/synthetic_user_interactions.csv' berhasil disimpan.


In [26]:
df_courses

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject,total_interactions,total_users
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5000,2017-01-18T20:58:58Z,Business Finance,10,10
1,1113822,Complete GST Course & Certification - Grow You...,https://www.udemy.com/goods-and-services-tax/,True,75,2792,923,274,All Levels,39.0000,2017-03-09T16:34:20Z,Business Finance,15,14
2,1006314,Financial Modeling for Business Analysts and C...,https://www.udemy.com/financial-modeling-for-b...,True,45,2174,74,51,Intermediate Level,2.5000,2016-12-19T19:26:30Z,Business Finance,11,11
3,1210588,Beginner to Pro - Financial Analysis in Excel ...,https://www.udemy.com/complete-excel-finance-c...,True,95,2451,11,36,All Levels,3.0000,2017-05-30T20:07:24Z,Business Finance,30,30
4,1011058,How To Maximize Your Profits Trading Options,https://www.udemy.com/how-to-maximize-your-pro...,True,200,1276,45,26,Intermediate Level,2.0000,2016-12-13T14:57:18Z,Business Finance,50,46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3667,775618,Learn jQuery from Scratch - Master of JavaScri...,https://www.udemy.com/easy-jquery-for-beginner...,True,100,1040,14,21,All Levels,2.0000,2016-06-14T17:36:46Z,Web Development,33,31
3668,1088178,How To Design A WordPress Website With No Codi...,https://www.udemy.com/how-to-make-a-wordpress-...,True,25,306,3,42,Beginner Level,3.5000,2017-03-10T22:24:30Z,Web Development,14,14
3669,635248,Learn and Build using Polymer,https://www.udemy.com/learn-and-build-using-po...,True,40,513,169,48,All Levels,3.5000,2015-12-30T16:41:42Z,Web Development,39,37
3670,905096,CSS Animations: Create Amazing Effects on Your...,https://www.udemy.com/css-animations-create-am...,True,50,300,31,38,All Levels,3.0000,2016-08-11T19:06:15Z,Web Development,31,27


In [27]:
display(df_interactions['user_id'].value_counts().head())
display(df_interactions.head())
display(df_courses.head())

Unnamed: 0_level_0,count
user_id,Unnamed: 1_level_1
user_174,261
user_386,256
user_91,253
user_235,253
user_19,251


Unnamed: 0,user_id,course_id,interaction_type,interaction
0,user_324,1070968,enrolled,1
1,user_252,1070968,complete,1
2,user_196,1070968,view,1
3,user_360,1070968,enrolled,1
4,user_10,1070968,buy,1


Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject,total_interactions,total_users
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5,2017-01-18T20:58:58Z,Business Finance,10,10
1,1113822,Complete GST Course & Certification - Grow You...,https://www.udemy.com/goods-and-services-tax/,True,75,2792,923,274,All Levels,39.0,2017-03-09T16:34:20Z,Business Finance,15,14
2,1006314,Financial Modeling for Business Analysts and C...,https://www.udemy.com/financial-modeling-for-b...,True,45,2174,74,51,Intermediate Level,2.5,2016-12-19T19:26:30Z,Business Finance,11,11
3,1210588,Beginner to Pro - Financial Analysis in Excel ...,https://www.udemy.com/complete-excel-finance-c...,True,95,2451,11,36,All Levels,3.0,2017-05-30T20:07:24Z,Business Finance,30,30
4,1011058,How To Maximize Your Profits Trading Options,https://www.udemy.com/how-to-maximize-your-pro...,True,200,1276,45,26,Intermediate Level,2.0,2016-12-13T14:57:18Z,Business Finance,50,46
