In [None]:
# Install dan import library yang dibutuhkan
!pip install pandas scikit-learn numpy openpyxl
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score



In [None]:
# Load dataset expert dari file .xlsx
experts_df = pd.read_excel('expert.xlsx')
experts_df['expertise'] = experts_df['expertise'].str.split(', ')
experts_df = experts_df.explode('expertise')
experts_df.head()

Unnamed: 0,expert_id,name,expertise
0,101,Fajar,Aritmatika
0,101,Fajar,Himpunan
1,102,Budi,Statistika
1,102,Budi,Geometri
2,103,Tegar,Trigonometri


In [None]:
# Split data menjadi train dan test set
train_df, test_df = train_test_split(experts_df, test_size=0.2, random_state=42)

In [None]:
# Buat matriks keahlian expert pada train set
train_expert_topic_df = train_df.groupby(['expert_id', 'expertise']).size().unstack(fill_value=0)
train_expert_topic_df.head()

expertise,Aljabar,Aritmatika,Fungsi,Geometri,Himpunan,Kalkulus,Probabilitas,Statistika,Trigonometri
expert_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
101,0,1,0,0,1,0,0,0,0
102,0,0,0,1,0,0,0,1,0
103,0,0,1,0,0,0,0,0,1
104,0,0,0,0,0,1,0,1,0
105,0,0,0,0,0,1,0,0,0


In [None]:
# Hitung similarity antar expert pada train set
train_expert_similarity = cosine_similarity(train_expert_topic_df)
train_expert_similarity_df = pd.DataFrame(train_expert_similarity,
                                          index=train_expert_topic_df.index,
                                          columns=train_expert_topic_df.index)
train_expert_similarity_df.head()

expert_id,101,102,103,104,105,106,107,108,109,110,...,191,192,193,194,195,196,197,198,199,200
expert_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
101,1.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.707107,...,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.5,0.0,0.5
102,0.0,1.0,0.0,0.5,0.0,0.5,0.5,0.0,0.707107,0.0,...,0.5,0.707107,0.0,0.707107,0.0,0.0,0.0,0.0,0.5,0.0
103,0.0,0.0,1.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.5,0.0,0.5,0.0,0.5,0.0,0.0,0.5
104,0.0,0.5,0.0,1.0,0.707107,0.0,0.5,0.0,0.707107,0.0,...,0.0,0.707107,0.5,0.0,0.0,0.0,0.0,0.5,0.0,0.0
105,0.0,0.0,0.0,0.707107,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.707107,0.0,0.0,0.0,0.0,0.707107,0.0,0.0


In [None]:
# Fungsi rekomendasi berdasarkan model yang dilatih pada train set
def get_expert_recommendations_train(expert_id, top_n=3):
    expert_scores = train_expert_similarity_df.loc[expert_id]
    top_experts = expert_scores.sort_values(ascending=False).head(top_n+1).tail(top_n)
    recommended_experts = train_df[train_df['expert_id'].isin(top_experts.index)].drop_duplicates('expert_id')
    return recommended_experts['expert_id'].tolist()

In [None]:
# Evaluasi pada test set
def evaluate_recommendations(test_df, top_n=3):
    precision_scores = []
    recall_scores = []
    f1_scores = []

    train_experts = train_df['expert_id'].unique()

    for expert_id in test_df['expert_id'].unique():
        if expert_id not in train_experts:
            continue

        actual_topics = test_df[test_df['expert_id'] == expert_id]['expertise'].tolist()
        recommended_expert_ids = get_expert_recommendations_train(expert_id, top_n)
        recommended_topics = train_df[train_df['expert_id'].isin(recommended_expert_ids)]['expertise'].tolist()

        precision = len(set(actual_topics) & set(recommended_topics)) / len(recommended_topics)
        recall = len(set(actual_topics) & set(recommended_topics)) / len(actual_topics)
        f1 = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0

        precision_scores.append(precision)
        recall_scores.append(recall)
        f1_scores.append(f1)

    return np.mean(precision_scores), np.mean(recall_scores), np.mean(f1_scores)

In [None]:
# Jalankan evaluasi
precision, recall, f1 = evaluate_recommendations(test_df)
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Precision: 0.02
Recall: 0.09
F1 Score: 0.03


In [None]:
# Fungsi untuk mendapatkan rekomendasi expert (menggunakan seluruh data)
def get_expert_recommendations(expert_id, top_n=3):
    expert_scores = expert_similarity_df.loc[expert_id]
    top_experts = expert_scores.sort_values(ascending=False).head(top_n+1).tail(top_n)
    recommended_experts = experts_df[experts_df['expert_id'].isin(top_experts.index)].drop_duplicates('expert_id')
    return recommended_experts[['expert_id', 'name']]

In [None]:
# Contoh penggunaan
expert_id = 101
recommended_experts = get_expert_recommendations(expert_id)

print(f"Recommended experts based on expert {expert_id}:")
print(recommended_experts)

Recommended experts based on expert 101:
    expert_id     name
9         110     Yuni
58        159  Lestari
80        181    Tegar
