# Load Data


In [1]:
import pandas as pd
import numpy as np

# Load data
tracking = pd.read_csv("clean_tracking.csv")
journeys = pd.read_csv("developer_journeys.csv") # Ambil dari folder datased used

# Mengambil Kolom Penting

In [2]:
# Memilih kolom
tracking = tracking[['developer_id', 'journey_id', 'first_opened_at', 'completed_at']]

# Digabungkan dengan journey name
df = pd.merge(
    tracking,
    journeys[['id', 'name', 'difficulty']],
    left_on='journey_id',
    right_on='id',
    how='left'
)

# Memberi Label Aktivitas pada MOdul

In [3]:
# Mmebuat status label
df['status_label'] = np.select(
    [
        df['completed_at'].notnull(),   # sudah selesai
        df['first_opened_at'].notnull() & df['completed_at'].isnull(),  # sedang belajar
        df['first_opened_at'].isnull()  # belum mulai
    ],
    ['completed', 'in_progress', 'not_started'],
    default='unknown'
).astype(str)


# Menyimpan Histori Aktivitas MOdul

In [4]:
# Merapikan Kolom
df_final = df[['developer_id', 'journey_id', 'name', 'difficulty', 'status_label', 'first_opened_at', 'completed_at']]

# Disimpan
df_final.to_csv("learning_history.csv", index=False)

print("✅ learning_history.csv berhasil dibuat!")
print(df_final.head(10))

✅ learning_history.csv berhasil dibuat!
   developer_id  journey_id  \
0         96989          26   
1         96989          26   
2         96989          32   
3         96989          32   
4         96989          32   
5         96989          72   
6         96989          32   
7         96989          32   
8         96989          72   
9         96989          72   

                                                name  difficulty status_label  \
0          Belajar Membangun Aplikasi Android Native         0.0  not_started   
1          Belajar Membangun Aplikasi Android Native         0.0  not_started   
2                     Belajar Membangun LINE Chatbot         1.0  in_progress   
3                     Belajar Membangun LINE Chatbot         1.0  in_progress   
4                     Belajar Membangun LINE Chatbot         1.0  in_progress   
5  Belajar Membangun Prototype Chatbot Dengan LIN...         0.0  in_progress   
6                     Belajar Membangun LINE Chatbo

In [5]:
df_final.head()

Unnamed: 0,developer_id,journey_id,name,difficulty,status_label,first_opened_at,completed_at
0,96989,26,Belajar Membangun Aplikasi Android Native,0.0,not_started,,
1,96989,26,Belajar Membangun Aplikasi Android Native,0.0,not_started,,
2,96989,32,Belajar Membangun LINE Chatbot,1.0,in_progress,2018-08-29 11:19:00,
3,96989,32,Belajar Membangun LINE Chatbot,1.0,in_progress,2018-08-29 11:32:00,
4,96989,32,Belajar Membangun LINE Chatbot,1.0,in_progress,2018-08-29 12:23:00,


In [6]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101736 entries, 0 to 101735
Data columns (total 7 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   developer_id     101736 non-null  int64  
 1   journey_id       101736 non-null  int64  
 2   name             101716 non-null  object 
 3   difficulty       101716 non-null  float64
 4   status_label     101736 non-null  object 
 5   first_opened_at  74348 non-null   object 
 6   completed_at     8746 non-null    object 
dtypes: float64(1), int64(2), object(4)
memory usage: 5.4+ MB


In [7]:
learning_progress_summary = df_final.groupby(['developer_id', 'status_label']).size().unstack(fill_value=0)
learning_progress_summary['total_journeys'] = learning_progress_summary.sum(axis=1)
display(learning_progress_summary.head())

status_label,completed,in_progress,not_started,total_journeys
developer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3390,296,3283,1947,5526
5774,258,1656,714,2628
11836,477,1265,1530,3272
17833,244,2597,972,3813
32258,321,2895,2727,5943


In [8]:
learning_progress_summary.to_csv("average_learning_history.csv", index=True)
print("✅ average_learning_history.csv berhasil dibuat!")

✅ average_learning_history.csv berhasil dibuat!
