In [4]:
import pandas as pd
import numpy as np

# === 1️⃣ LOAD DATA ===
tracking = pd.read_csv("tracking_clean.csv")
journeys = pd.read_csv("developer_journeys.csv")

# === 2️⃣ PILIH KOLOM PENTING ===
tracking = tracking[['developer_id', 'journey_id', 'first_opened_at', 'completed_at']]

# === 3️⃣ GABUNG DENGAN JOURNEY NAME ===
df = pd.merge(
    tracking,
    journeys[['id', 'name', 'difficulty']],
    left_on='journey_id',
    right_on='id',
    how='left'
)

# === 4️⃣ BUAT STATUS ===
df['status_label'] = np.select(
    [
        df['completed_at'].notnull(),   # sudah selesai
        df['first_opened_at'].notnull() & df['completed_at'].isnull(),  # sedang belajar
        df['first_opened_at'].isnull()  # belum mulai
    ],
    ['completed', 'in_progress', 'not_started'],
    default='unknown'   # <— tambahkan ini biar aman
).astype(str)


# === 5️⃣ RAPIKAN KOLOM ===
df_final = df[['developer_id', 'journey_id', 'name', 'difficulty', 'status_label', 'first_opened_at', 'completed_at']]

# === 6️⃣ SIMPAN ===
df_final.to_csv("learning_history.csv", index=False)

print("✅ learning_history.csv berhasil dibuat!")
print(df_final.head(10))


✅ learning_history.csv berhasil dibuat!
   developer_id  journey_id  \
0         96989          26   
1         96989          26   
2         96989          32   
3         96989          32   
4         96989          32   
5         96989          72   
6         96989          32   
7         96989          32   
8         96989          72   
9         96989          72   

                                                name  difficulty status_label  \
0          Belajar Membangun Aplikasi Android Native         0.0  not_started   
1          Belajar Membangun Aplikasi Android Native         0.0  not_started   
2                     Belajar Membangun LINE Chatbot         1.0  in_progress   
3                     Belajar Membangun LINE Chatbot         1.0  in_progress   
4                     Belajar Membangun LINE Chatbot         1.0  in_progress   
5  Belajar Membangun Prototype Chatbot Dengan LIN...         0.0  in_progress   
6                     Belajar Membangun LINE Chatbo

In [5]:
import pandas as pd

# === 1️⃣ Load dataset utama dashboard dan hasil model ===
dashboard = pd.read_csv("dashboard_data.csv")
clustered = pd.read_csv("clustered_learners.csv")

# === 2️⃣ Gabungkan berdasarkan developer_id ===
merged_dashboard = pd.merge(
    dashboard,
    clustered[['developer_id', 'learner_type']],
    on='developer_id',
    how='left'   # pakai left supaya semua mentee tetap muncul
)

# === 3️⃣ Isi learner_type kosong dengan 'Unclassified' (jika ada user baru) ===
merged_dashboard['learner_type'] = merged_dashboard['learner_type'].fillna('Unclassified')

# === 4️⃣ Simpan ulang hasil final ===
merged_dashboard.to_csv("dashboard_data_final.csv", index=False)

print("✅ dashboard_data_final.csv berhasil dibuat!")
print("Shape:", merged_dashboard.shape)
print("Kolom:", list(merged_dashboard.columns))


✅ dashboard_data_final.csv berhasil dibuat!
Shape: (31, 14)
Kolom: ['developer_id', 'materials_completed', 'active_days', 'avg_rating', 'avg_score', 'consistency_score', 'fast_learner_flag', 'reflective_learner_flag', 'study_duration_total', 'display_name', 'email', 'user_role', 'city_id', 'learner_type']


In [11]:
import pandas as pd

# Load kedua dataset
dashboard = pd.read_csv("dashboard_data_final.csv")
history = pd.read_csv("learning_history.csv")

# Merge berdasarkan developer_id
merged_full = pd.merge(
    history,
    dashboard[['developer_id', 'display_name', 'learner_type', 'avg_rating', 'consistency_score']],
    on='developer_id',
    how='left'
)

# Simpan hasil gabungan
merged_full.to_csv("dashboard_learning_detail.csv", index=False)

print("✅ dashboard_learning_detail.csv berhasil dibuat!")
print("Ukuran data:", merged_full.shape)


✅ dashboard_learning_detail.csv berhasil dibuat!
Ukuran data: (101736, 11)
