In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import shap

In [None]:
# Data project Simulation
np.random.seed(42)

def generate_project_data(n_samples=1000):
    data = []
    for _ in range(n_samples):
        # Simulasi durasi masing-masing task
        task_a = np.random.normal(10, 2)  # Task A, rata-rata 10 hari
        task_b = np.random.normal(20, 5)  # Task B, rata-rata 20 hari
        task_c = np.random.normal(15, 3)  # Task C, rata-rata 15 hari
        delay = np.random.uniform(0, 0.2)  # Delay random antara 0% - 20%
        
        # Total durasi proyek
        total_duration = (task_a + task_b + task_c) * (1 + delay)
        
        data.append([task_a, task_b, task_c, delay, total_duration])
    
    columns = ['Task_A_Duration', 'Task_B_Duration', 'Task_C_Duration', 'Delay_Percentage', 'Total_Project_Duration']
    return pd.DataFrame(data, columns=columns)

df = generate_project_data()
print(df.head())

In [None]:
# Machine Learning Model

X = df[['Task_A_Duration', 'Task_B_Duration', 'Task_C_Duration', 'Delay_Percentage']]
y = df['Total_Project_Duration']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor()
model.fit(X_train, y_train)

print(f"Training Score: {model.score(X_train, y_train):.2f}")
print(f"Testing Score: {model.score(X_test, y_test):.2f}")


In [None]:
# SHAP Explainer

explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)

# Plot SHAP summary
shap.summary_plot(shap_values, X_test)