In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LeakyReLU, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk
from nltk.corpus import stopwords

# Load data
data = pd.read_csv("data/updatemergedata.csv")
data.head()

# Encode categorical variables
label_encoder = LabelEncoder()
data['Level'] = label_encoder.fit_transform(data['Level']) 
data['Price'] = data['Price'].map({'Berbayar': 1, 'Gratis': 0})

# 1. Gabungkan fitur teks menjadi satu kolom
indonesian_stopwords = stopwords.words('indonesian')
numerical_features = data[['Level', 'Price']].values
data['Combined Summary'] = data['Learning Path'] + ' ' + data['Learning Path Summary'] + ' ' + data['Course Name_x'] + ' ' + data['Course Summary']
# 2. TF-IDF untuk fitur teks
tfidf = TfidfVectorizer(stop_words=indonesian_stopwords)
tfidf_matrix = tfidf.fit_transform(data['Combined Summary']).toarray()

# 3. Scaling data untuk fitur numerik jika diperlukan
scaler = StandardScaler()
X_scaled = scaler.fit_transform(tfidf_matrix)
X_combined = np.hstack((X_scaled, numerical_features))



In [10]:
print("Shape of TF-IDF matrix:", tfidf_matrix.shape)
print("Shape of numerical features:", numerical_features.shape)
print("Shape of combined data:", X_combined.shape)

Shape of TF-IDF matrix: (6039, 409)
Shape of numerical features: (6039, 2)
Shape of combined data: (6039, 411)


In [20]:
# 1. Tentukan dimensi input
input_dim = X_combined.shape[1]

# 2. Definisikan model Autoencoder
input_layer = Input(shape=(input_dim,))
encoded = Dense(128)(input_layer)
encoded = LeakyReLU(negative_slope=0.01)(encoded)
encoded = Dropout(0.2)(encoded)
encoded = Dense(64)(encoded)
encoded = LeakyReLU(negative_slope=0.01)(encoded)
encoded = Dropout(0.2)(encoded)

decoded = Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.01))(encoded)
decoded = LeakyReLU(negative_slope=0.01)(decoded)
decoded = Dense(128, kernel_regularizer=tf.keras.regularizers.l2(0.01))(decoded)
decoded = LeakyReLU(negative_slope=0.01)(decoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder = Model(input_layer, decoded)

# 3. Compile model
optimizer = Adam(learning_rate=0.001)
autoencoder.compile(optimizer=optimizer, loss='mean_squared_error',  metrics=['accuracy'])

# 4. Early stopping untuk menghindari overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# 5. Train model
history = autoencoder.fit(
    X_combined, X_combined,  # Gunakan data gabungan
    epochs=50,
    batch_size=64,
    shuffle=True,
    validation_split=0.2,
    callbacks=[early_stopping]
)

# 6. Simpan model
autoencoder.save("coba.keras")



Epoch 1/50
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.0000e+00 - loss: 2.1020 - val_accuracy: 0.0000e+00 - val_loss: 2.1260
Epoch 2/50
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 1.0855 - val_accuracy: 0.0000e+00 - val_loss: 1.8383
Epoch 3/50
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.7863 - val_accuracy: 0.0000e+00 - val_loss: 1.7602
Epoch 4/50
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.7048 - val_accuracy: 0.0000e+00 - val_loss: 1.7387
Epoch 5/50
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.6758 - val_accuracy: 0.0000e+00 - val_loss: 1.7250
Epoch 6/50
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0000e+00 - loss: 0.6620 - val_accuracy: 0.0000e+00 - val_loss:

In [24]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_learning_paths_with_encoder(data, encoder, user_preferences, programming_languages, github_username=None):
    # Ambil skill dari GitHub jika tersedia
    github_languages = get_github_skills(github_username) if github_username else []
    combined_preferences = user_preferences + programming_languages + github_languages
    
    # Vectorize user preferences
    user_vector = " ".join(combined_preferences)
    vectorizer = TfidfVectorizer(stop_words=indonesian_stopwords)
    
    # Gabungkan data learning path dan user preferences
    all_vectors = vectorizer.fit_transform(data['Combined Summary'].tolist() + [user_vector])
    user_vector_tfidf = all_vectors[-1].toarray()  # Make sure it's a 2D array
    learning_paths_vectors = all_vectors[:-1].toarray()  # Make sure it's a 2D array
    
    # Check if the input shapes are correct
    print(f"Shape of user vector: {user_vector_tfidf.shape}")
    print(f"Shape of learning paths vectors: {learning_paths_vectors.shape}")
    
    # Representasi latent menggunakan encoder
    latent_learning_paths = encoder.predict(learning_paths_vectors)
    latent_user_vector = encoder.predict(user_vector_tfidf)
    
    # Hitung kemiripan antara representasi latent pengguna dan learning paths
    similarity_scores = cosine_similarity(latent_user_vector, latent_learning_paths)
    data['similarity'] = similarity_scores.flatten()

    # Filter rekomendasi berdasarkan nilai kemiripan minimal 0.4
    filtered_recommendations = data[data['similarity'] >= 0.4].sort_values(by='similarity', ascending=False)
    
    # Jika ada kurang dari 5 yang memenuhi kriteria, ambil 5 teratas meskipun kurang dari 0.4
    if len(filtered_recommendations) < 5:
        recommendations = data.sort_values(by='similarity', ascending=False).drop_duplicates(subset=['Course Name_x']).head(5)
    else:
        recommendations = filtered_recommendations.drop_duplicates(subset=['Course Name_x']).head(5)

    # Jelaskan alasan rekomendasi
    explanations = []
    for _, row in recommendations.iterrows():
        explanations.append(
            f"Rekomendasi '{row['Course Name_x']}' memiliki kemiripan tinggi dengan preferensi Anda "
            f"berdasarkan skill '{user_preferences}', bahasa pemrograman '{programming_languages}', dan aktivitas GitHub '{github_languages}'.")

    return recommendations[['Course Name_x', 'similarity']], explanations


In [25]:
# Contoh Penggunaan
user_preferences = ['Backend Developer', 'cloud computing', 'API Processing']  # Skill yang ingin dikembangkan
programming_languages = ['JavaScript', 'AWS', 'Java']  # Bahasa pemrograman yang diminati
github_username = 'ricomessi'

# Mendapatkan rekomendasi
recommendations, explanations = recommend_learning_paths_with_encoder(data, encoder, user_preferences, programming_languages, github_username)

print("\nTop Recommended Learning Paths:")
print(recommendations)
print("\nExplanation for Recommendations:")
for explanation in explanations:
    print(explanation)




Shape of user vector: (1, 414)
Shape of learning paths vectors: (6039, 414)


InvalidArgumentError: Graph execution error:

Detected at node functional_11_1/leaky_re_lu_28_1/LeakyRelu defined at (most recent call last):
  File "C:\Users\Rico Mesias\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main

  File "C:\Users\Rico Mesias\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel_launcher.py", line 18, in <module>

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "C:\Users\Rico Mesias\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 595, in run_forever

  File "C:\Users\Rico Mesias\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 1881, in _run_once

  File "C:\Users\Rico Mesias\AppData\Local\Programs\Python\Python310\lib\asyncio\events.py", line 80, in _run

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel\ipkernel.py", line 362, in execute_request

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel\ipkernel.py", line 449, in do_execute

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code

  File "C:\Users\Rico Mesias\AppData\Local\Temp\ipykernel_4512\1551639828.py", line 7, in <module>

  File "C:\Users\Rico Mesias\AppData\Local\Temp\ipykernel_4512\1807422840.py", line 22, in recommend_learning_paths_with_encoder

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 510, in predict

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 208, in one_step_on_data_distributed

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 198, in one_step_on_data

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 96, in predict_step

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\layers\layer.py", line 899, in __call__

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\models\functional.py", line 182, in call

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\ops\function.py", line 171, in _run_through_graph

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\models\functional.py", line 584, in call

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\layers\layer.py", line 899, in __call__

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\layers\activations\leaky_relu.py", line 57, in call

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\activations\activations.py", line 120, in leaky_relu

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\ops\nn.py", line 294, in leaky_relu

  File "C:\Users\Rico Mesias\OneDrive\Documents\VsCode\DicodingCompany\venv\lib\site-packages\keras\src\backend\tensorflow\nn.py", line 50, in leaky_relu

Matrix size-incompatible: In[0]: [32,414], In[1]: [411,128]
	 [[{{node functional_11_1/leaky_re_lu_28_1/LeakyRelu}}]] [Op:__inference_one_step_on_data_distributed_74040]

In [6]:
import requests

def get_github_skills(username):
    # Ambil bahasa pemrograman dominan dari profil GitHub
    url = f"https://api.github.com/users/ricomessi/repos"
    response = requests.get(url)
    repos = response.json() if response.status_code == 200 else []

    languages = []
    for repo in repos:
        language_url = repo.get('languages_url')
        if language_url:
            lang_response = requests.get(language_url)
            lang_data = lang_response.json() if lang_response.status_code == 200 else {}
            languages.extend(lang_data.keys())

    return list(set(languages))  # Mengambil bahasa unik

def recommend_learning_paths(data, user_preferences, programming_languages, github_username=None):
    # Ambil skill dari GitHub jika tersedia
    github_languages = get_github_skills(github_username) if github_username else []
    combined_preferences = user_preferences + programming_languages + github_languages
    
    # Buat vector user preferences
    user_vector = " ".join(combined_preferences)
    vectorizer = TfidfVectorizer(stop_words=indonesian_stopwords)
    
    # Gabungkan data learning path dan user preferences
    all_vectors = vectorizer.fit_transform(data['Combined Summary'].tolist() + [user_vector])
    user_vector_tfidf = all_vectors[-1]
    learning_paths_vectors = all_vectors[:-1]
    
    # Hitung kemiripan antara user preferences dan learning paths
    similarity_scores = cosine_similarity(user_vector_tfidf, learning_paths_vectors)
    data['similarity'] = similarity_scores.flatten()

    # Filter rekomendasi berdasarkan nilai kemiripan minimal 0.4
    filtered_recommendations = data[data['similarity'] >= 0.4].sort_values(by='similarity', ascending=False)
    
    # Jika ada kurang dari 5 yang memenuhi kriteria, ambil 5 teratas meskipun kurang dari 0.4
    if len(filtered_recommendations) < 5:
        recommendations = data.sort_values(by='similarity', ascending=False).drop_duplicates(subset=['Course Name_x']).head(5)
    else:
        recommendations = filtered_recommendations.drop_duplicates(subset=['Course Name_x']).head(5)

    # Jelaskan alasan rekomendasi
    explanations = []
    for _, row in recommendations.iterrows():
        explanations.append(
            f"Rekomendasi '{row['Course Name_x']}' memiliki kemiripan tinggi dengan preferensi Anda "
            f"berdasarkan skill '{user_preferences}', bahasa pemrograman '{programming_languages}', dan aktivitas GitHub '{github_languages}'.")

    return recommendations[['Course Name_x', 'similarity']], explanations

# Contoh Penggunaan
user_preferences = ['Backend Developer', 'cloud computing', 'API Processing']  # Skill yang ingin dikembangkan
programming_languages = ['JavaScript', 'AWS', 'Java']  # Bahasa pemrograman yang diminati
github_username = 'username_github'

# Mendapatkan rekomendasi
recommendations, explanations = recommend_learning_paths(data, user_preferences, programming_languages, github_username)

print("\nTop Recommended Learning Paths:")
print(recommendations)
print("\nExplanation for Recommendations:")
for explanation in explanations:
    print(explanation)



Top Recommended Learning Paths:
                                          Course Name_x  similarity
497   Cloud Practitioner Essentials (Belajar Dasar A...    0.167497
799   Architecting on AWS (Membangun Arsitektur Clou...    0.125424
3015               Belajar Dasar Pemrograman JavaScript    0.125308
3224                      Menjadi Google Cloud Engineer    0.108695
1112     Belajar Fundamental Back-End dengan JavaScript    0.108047

Explanation for Recommendations:
Rekomendasi 'Cloud Practitioner Essentials (Belajar Dasar AWS Cloud)' memiliki kemiripan tinggi dengan preferensi Anda berdasarkan skill '['Backend Developer', 'cloud computing', 'API Processing']', bahasa pemrograman '['JavaScript', 'AWS', 'Java']', dan aktivitas GitHub '['HTML', 'PHP', 'CSS', 'Python', 'Java', 'JavaScript', 'Hack']'.
Rekomendasi 'Architecting on AWS (Membangun Arsitektur Cloud di AWS)' memiliki kemiripan tinggi dengan preferensi Anda berdasarkan skill '['Backend Developer', 'cloud computing', 'API Pro



In [4]:
def explain_recommendation(recommendations, data):
    explanations = []
    for idx, row in recommendations.iterrows():
        course_name = row['Course Name_x']
        similarity = row['similarity']
        explanation = f"Course '{course_name}' is recommended because it has a high similarity score of {similarity:.2f} with your preferences."
        explanations.append(explanation)
    return explanations

# Menampilkan penjelasan
explanations = explain_recommendation(recommendations, data)
print("\nExplanation for Recommendations:")
for explanation in explanations:
    print(explanation)
 


Explanation for Recommendations:
Course 'Memulai Pemrograman dengan Python' is recommended because it has a high similarity score of 0.16 with your preferences.
Course 'Belajar Pemrograman Prosedural dengan Python' is recommended because it has a high similarity score of 0.11 with your preferences.


In [11]:
from tensorflow.keras.models import load_model

# Load model yang telah disimpan
model = load_model("learning_path_recommendation_model.h5")

# Contoh input pengguna
user_preferences = ["PHP", 'CodeIgniter', 'Java']  # Skill dan minat
recommendations = recommend_learning_paths(data, user_preferences)

print("\nTop 3 Recommended Learning Paths:")
print(recommendations)

# Penjelasan mengapa course ini direkomendasikan
explanations = explain_recommendation(recommendations, data)
print("\nExplanation for Recommendations:")
for explanation in explanations:
    print(explanation)





Top 3 Recommended Learning Paths:
                           Course Name_x  similarity
6038  Belajar Strategi Pengembangan Diri         0.0
0      Memulai Pemrograman dengan Kotlin         0.0
1      Memulai Pemrograman dengan Kotlin         0.0

Explanation for Recommendations:
Course 'Belajar Strategi Pengembangan Diri' is recommended because it has a high similarity score of 0.00 with your preferences.
Course 'Memulai Pemrograman dengan Kotlin' is recommended because it has a high similarity score of 0.00 with your preferences.
Course 'Memulai Pemrograman dengan Kotlin' is recommended because it has a high similarity score of 0.00 with your preferences.


