In [50]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import word_tokenize
from sklearn.metrics.pairwise import cosine_similarity
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
import pickle
import requests

class ContentBasedRecommender:
    def __init__(self, api_url='http://127.0.0.1:8082/get-all-active-course', content_col='deskripsi'):
        self.api_url = api_url
        self.content_col = content_col
        self.vectorizer = None
        self.bank = None
        self.df = None
        self.latest_data()

    def latest_data(self):
        response = requests.post(self.api_url)
        
        if response.status_code == 200:
            data = response.json()  # atau sesuaikan dengan format respons API
            self.df = pd.DataFrame(data)
        else:
            print(f"Error {response.status_code}: {response.text}")

    def fit(self):
        stopword_factory = StopWordRemoverFactory()
        stopwords = stopword_factory.get_stop_words()

        self.vectorizer = TfidfVectorizer(stop_words=stopwords)
        self.bank = self.vectorizer.fit_transform(self.df[self.content_col])

    def train(self):
        self.latest_data()
        self.fit()

    def recommend(self, course_id, min_similarity=0, topk=10):
        idx = self.df.index[self.df['id_course'] == course_id].tolist()

        if not idx:
            print(f"ID course {course_id} tidak ditemukan.")
            return []

        idx = idx[0]
        content = self.df.loc[idx, self.content_col]
        code = self.vectorizer.transform([content])

        cosine_sim = cosine_similarity(code, self.bank)
        rec_idx = cosine_sim.argsort()[0, ::-1][1:topk+1]

        filtered_rec_idx = [i for i in rec_idx if cosine_sim[0, i] > min_similarity][:topk]
        recommendations_df = self.df.loc[filtered_rec_idx].copy()

        recommendations_df['similarity'] = [cosine_sim[0, i] for i in filtered_rec_idx]
        recommendations = recommendations_df.to_dict(orient='records')

        return recommendations
    
    def recommend_by_interest(self, user_interest, min_similarity=0, topk=10):
        interest_vector = self.vectorizer.transform([user_interest])

        cosine_sim = cosine_similarity(interest_vector, self.bank)
        rec_idx = cosine_sim.argsort()[0, ::-1][0:topk]
        
        filtered_rec_idx = [idx for idx in rec_idx if cosine_sim[0, idx] > min_similarity]

        recommendations_df = self.df.loc[filtered_rec_idx].copy()

        recommendations_df['similarity'] = [cosine_sim[0, idx] for idx in filtered_rec_idx]
        recommendations = recommendations_df.to_dict(orient='records')

        return recommendations

    def save_model(self, file_path):
        with open(file_path, 'wb') as file:
            pickle.dump(self, file)
        print("Model berhasil diexport dengan nama: ", file_path)
        
    @classmethod
    def load_model(cls, file_path):
        with open(file_path, 'rb') as file:
            return pickle.load(file)


In [45]:
recommender = ContentBasedRecommender()
recommender.fit()

In [46]:
recommender.recommend(13)

[{'id_course': 17,
  'nama_course': 'z',
  'kategori': 'React.js',
  'kesulitan': 'Dasar',
  'deskripsi': 'Mempelajari algoritma dasar dengan alur logika seperti analisis kasus',
  'total_modul': 0,
  'followed': 1,
  'rating': None,
  'tgl_dibuat': '2023-12-20T17:00:00.000Z',
  'similarity': 0.9443728111886356},
 {'id_course': 15,
  'nama_course': 'tes',
  'kategori': 'Kotlin',
  'kesulitan': 'Menengah',
  'deskripsi': 'algoritma dasar dengan menggunakan alur seperti menganalisa kasus',
  'total_modul': 0,
  'followed': 0,
  'rating': None,
  'tgl_dibuat': '2023-09-08T17:00:00.000Z',
  'similarity': 0.5752716283822946},
 {'id_course': 14,
  'nama_course': 'Flowchart',
  'kategori': 'React.js',
  'kesulitan': 'Dasar',
  'deskripsi': 'Flowchart berperan penting dalam memutuskan sebuah langkah atau fungsionalitas dari sebuah proyek pembuatan program yang melibatkan banyak orang sekaligus. Selain itu dengan menggunakan bagan alur proses dari sebuah program akan lebih jelas, ringkas, dan '

In [49]:
recommender.recommend_by_interest("Web")

[]