In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, root_mean_squared_error, mean_absolute_percentage_error

import plotly.express as px
import plotly.graph_objects as go
from datetime import timedelta

In [2]:
def load_and_merge(coin):
    if coin == 'BTC':
        price = pd.read_csv('btc-usd-max.csv', parse_dates=['snapped_at'])
        github = pd.read_csv('bitcoin_bitcoin_github_social_signals.csv')
    elif coin == 'ETH':
        price = pd.read_csv('eth-usd-max.csv', parse_dates=['snapped_at'])
        github = pd.read_csv('ethereum_go-ethereum_github_social_signals.csv')
    else:
        print('Salah nama koin')
        return

    # Normalize & remove timezone
    price['snapped_at'] = pd.to_datetime(price['snapped_at']).dt.tz_localize(None).dt.normalize()
    github['date'] = pd.to_datetime(github['date'], errors='coerce').dt.tz_localize(None).dt.normalize()

    price = price.dropna(subset=['snapped_at'])
    github = github.dropna(subset=['date'])

    min_date = price['snapped_at'].min()
    max_date = price['snapped_at'].max()

    github = github[(github['date'] >= min_date) & (github['date'] <= max_date)]

    df = pd.merge(price, github, left_on='snapped_at', right_on='date', how='left')
    df = df.drop(columns=['date'])
    return df

In [3]:
def preprocess(df):
    # Pastikan harga numerik
    df['price'] = pd.to_numeric(df['price'], errors='coerce')

    # Urutkan berdasarkan waktu
    df = df.sort_values('snapped_at')

    # Isi market cap yang hilang dengan forward fill
    df['market_cap'] = df['market_cap'].ffill()

    # Gabungkan issue GitHub
    issue_cols = [col for col in ['issues_opened', 'issues_closed', 'issue_comments'] if col in df.columns]
    if issue_cols:
        df['issue_activity'] = df[issue_cols].sum(axis=1)
        df.drop(columns=issue_cols, inplace=True)

    # Gabungkan pulls Github
    pull_cols = [col for col in ['pulls_opened', 'pulls_merged', 'pulls_closed'] if col in df.columns]
    if pull_cols:
        df['pull_activity'] = df[pull_cols].sum(axis=1)
        df.drop(columns=pull_cols, inplace=True)

    # Hapus stars dan forks
    df.drop(columns=[col for col in ['stars', 'forks'] if col in df.columns], inplace=True)

    return df

In [4]:
def calculate_metrics(y_true, y_pred, model_name="Model"):
    # Konversi input ke array NumPy untuk konsistensi
    y_true_np = np.array(y_true)
    y_pred_np = np.array(y_pred)

    # Pemeriksaan awal untuk panjang dan kekosongan
    if len(y_true_np) == 0:
        print(f"  Peringatan: y_true kosong untuk {model_name}. Metrik tidak dihitung.")
        return {'rmse': np.nan, 'mae': np.nan, 'mape': np.nan, 'r2': np.nan}
    if len(y_true_np) != len(y_pred_np):
        print(f"  Peringatan: Panjang y_true ({len(y_true_np)}) dan y_pred ({len(y_pred_np)}) tidak cocok untuk {model_name}. Metrik tidak dihitung.")
        min_len = min(len(y_true_np), len(y_pred_np))
        y_true_np = y_true_np[:min_len]
        y_pred_np = y_pred_np[:min_len]
        if min_len == 0:
            print(f"  Peringatan: Setelah penyesuaian panjang, data untuk {model_name} kosong. Metrik tidak dihitung.")
            return {'rmse': np.nan, 'mae': np.nan, 'mape': np.nan, 'r2': np.nan}


    rmse_val = root_mean_squared_error(y_true_np, y_pred_np)
    mae_val = mean_absolute_error(y_true_np, y_pred_np)
    r2_val = r2_score(y_true_np, y_pred_np)
    mape_val = mean_absolute_percentage_error(y_true_np, y_pred_np) * 100
    
    print(f"\n--- Metrik Evaluasi untuk {model_name} ---")
    print(f"RMSE: {rmse_val:.4f}")
    print(f"MAE: {mae_val:.4f}")
    if not np.isnan(mape_val): 
        print(f"MAPE: {mape_val:.2f}%")
    else: 
        print("MAPE: Tidak dapat dihitung")
    print(f"R2 Score: {r2_val:.4f}")
    
    return {'rmse': rmse_val, 'mae': mae_val, 'mape': mape_val, 'r2': r2_val}

In [5]:
# Fungsi untuk memuat dan memproses data aktual dari CSV untuk evaluasi akhir
def load_actual_data_for_evaluation(csv_file_path, n_periods, expected_start_date):
    try:
        df_actual = pd.read_csv(csv_file_path).iloc[-n_periods:]
        
        date_col_actual = None
        if 'snapped_at' in df_actual.columns: date_col_actual = 'snapped_at'
        elif 'Date' in df_actual.columns: date_col_actual = 'Date'
        else: raise KeyError("Kolom tanggal ('snapped_at' atau 'Date') tidak ditemukan di CSV aktual.")
        
        df_actual['date_norm'] = pd.to_datetime(df_actual[date_col_actual]).dt.tz_localize(None).dt.normalize()
        
        price_col_actual = None
        possible_price_cols = ['Close', 'price', 'Price', 'close']
        for col in possible_price_cols:
            if col in df_actual.columns: price_col_actual = col; break
        if price_col_actual is None: raise KeyError(f"Kolom harga (e.g., 'Close', 'price') tidak ditemukan. Kolom: {df_actual.columns.tolist()}")
            
        df_actual = df_actual.set_index('date_norm')[[price_col_actual]].rename(columns={price_col_actual: 'Actual_Price'})
        df_actual = df_actual.sort_index()
        
        # Ambil data yang relevan dengan periode prediksi
        # Pastikan data aktual mencakup periode prediksi
        expected_end_date = expected_start_date + timedelta(days=n_periods - 1)
        df_actual_eval = df_actual[(df_actual.index >= expected_start_date) & (df_actual.index <= expected_end_date)]
        
        if len(df_actual_eval) < n_periods:
            print(f"Peringatan: Data aktual hanya memiliki {len(df_actual_eval)} poin untuk periode {n_periods} hari yang diharapkan.")
            print(f"Periode yang diharapkan: {expected_start_date.strftime('%Y-%m-%d')} hingga {expected_end_date.strftime('%Y-%m-%d')}")
            print(f"Data aktual tersedia dari {df_actual.index.min().strftime('%Y-%m-%d')} hingga {df_actual.index.max().strftime('%Y-%m-%d')}")


        return df_actual_eval
    except FileNotFoundError:
        print(f"ERROR: File '{csv_file_path}' tidak ditemukan.")
        return pd.DataFrame()
    except Exception as e:
        print(f"ERROR saat memuat data aktual: {e}")
        return pd.DataFrame()

In [6]:
# Plot Matriks Korelasi
def plot_corr(df, title):
    corr = df.select_dtypes(include=[np.number]).corr()
    fig = px.imshow(
        corr,
        text_auto=True,
        color_continuous_scale='RdBu',
        zmin=-1, zmax=1,
        aspect="auto",
        labels=dict(x="Fitur", y="Fitur", color="Korelasi")
    )
    fig.update_layout(title=title, width=800, height=700)
    fig.show()

In [7]:
def evaluate_and_plot_prediction(model_name, df_predictions, target_column, actual_data_path, n_days, start_date, coin_name):
    """
    Evaluasi hasil prediksi terhadap data aktual dan tampilkan plot perbandingan.

    Parameters:
    - model_name: str, nama model untuk ditampilkan di grafik dan metrik.
    - df_predictions: DataFrame, berisi hasil prediksi dengan index berupa tanggal.
    - target_column: str, nama kolom hasil prediksi di df_predictions.
    - actual_data_path: str, path ke file CSV berisi data aktual.
    - n_days: int, jumlah hari ke depan untuk evaluasi.
    - start_date: datetime, tanggal awal prediksi.
    """
    print(f"\n--- Prediksi {n_days} Hari ke Depan ({model_name}) ---")
    print(df_predictions[[target_column]])

    print(f"\n--- Evaluasi Prediksi {model_name} dengan Data Aktual ---")
    actual_data = load_actual_data_for_evaluation(actual_data_path, n_days, start_date)

    if actual_data.empty or df_predictions[target_column].isnull().all():
        print(f"Data aktual tidak tersedia atau semua prediksi {model_name} adalah NaN.")
        return

    comparison = df_predictions.join(actual_data, how='inner')

    if comparison.empty or 'Actual_Price' not in comparison.columns or target_column not in comparison.columns:
        print(f"Gagal menggabungkan prediksi {model_name} dengan data aktual atau kolom tidak ditemukan.")
        return

    comparison.dropna(subset=['Actual_Price', target_column], inplace=True)

    if comparison.empty:
        print(f"Tidak ada data yang cocok untuk evaluasi {model_name} setelah dropna.")
        return

    # Gunakan fungsi yang menyimpan metrik
    if 'calculate_metrics_with_storage' in globals():
        calculate_metrics_with_storage(comparison['Actual_Price'], comparison[target_column], 
                                     f"{model_name} Pred vs Actual", coin_name, "Multivariate Time Series")
    else:
        calculate_metrics(comparison['Actual_Price'], comparison[target_column], f"{model_name} Pred vs Actual")

    fig = go.Figure()

    # Harga Aktual
    fig.add_trace(go.Scatter(
        x=comparison.index,
        y=comparison['Actual_Price'],
        mode='lines+markers',
        name='Harga Aktual',
        marker=dict(symbol='circle', size=8),
        line=dict(color='blue')
    ))

    # Harga Prediksi
    fig.add_trace(go.Scatter(
        x=comparison.index,
        y=comparison[target_column],
        mode='lines+markers',
        name=f'Prediksi {model_name}',
        marker=dict(symbol='x', size=8),
        line=dict(color='red', dash='dash')
    ))

    fig.update_layout(
        title_text=f'Perbandingan Harga Prediksi {model_name} dengan Harga Aktual {coin_name} dalam {n_days} Hari ke Depan',
        xaxis_title='Tanggal',
        yaxis_title='Harga',
        legend_title_text='Legenda',
        hovermode="x unified"
    )

    fig.show()


# BTC

In [8]:
btc = load_and_merge('BTC')
btc

Unnamed: 0,snapped_at,price,market_cap,total_volume,stars,forks,issues_opened,issues_closed,pulls_opened,pulls_merged,pulls_closed,commits,issue_comments
0,2013-04-28,135.300000,1.500518e+09,0.000000e+00,0,0,1,1,3,1,0,4,4
1,2013-04-29,141.960000,1.575032e+09,0.000000e+00,0,0,1,4,2,5,1,9,4
2,2013-04-30,135.300000,1.501657e+09,0.000000e+00,0,0,3,3,3,1,0,5,2
3,2013-05-01,117.000000,1.298952e+09,0.000000e+00,0,0,0,0,5,0,0,4,5
4,2013-05-02,103.430000,1.148668e+09,0.000000e+00,0,0,1,1,5,1,2,5,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4386,2025-05-03,96855.568134,1.923252e+12,2.327638e+10,0,0,2,3,5,0,0,1,1
4387,2025-05-04,95922.868424,1.904895e+12,1.379755e+10,0,0,0,0,2,0,1,1,1
4388,2025-05-05,94326.620485,1.872812e+12,1.490312e+10,0,0,0,0,4,3,4,3,1
4389,2025-05-06,94758.823711,1.882511e+12,2.408646e+10,0,0,2,16,6,5,0,7,1


In [9]:
# btc_pre = preprocess(btc).drop(columns=['issue_activity', 'pull_activity'])
btc_pre = btc.copy()
btc_pre

Unnamed: 0,snapped_at,price,market_cap,total_volume,stars,forks,issues_opened,issues_closed,pulls_opened,pulls_merged,pulls_closed,commits,issue_comments
0,2013-04-28,135.300000,1.500518e+09,0.000000e+00,0,0,1,1,3,1,0,4,4
1,2013-04-29,141.960000,1.575032e+09,0.000000e+00,0,0,1,4,2,5,1,9,4
2,2013-04-30,135.300000,1.501657e+09,0.000000e+00,0,0,3,3,3,1,0,5,2
3,2013-05-01,117.000000,1.298952e+09,0.000000e+00,0,0,0,0,5,0,0,4,5
4,2013-05-02,103.430000,1.148668e+09,0.000000e+00,0,0,1,1,5,1,2,5,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4386,2025-05-03,96855.568134,1.923252e+12,2.327638e+10,0,0,2,3,5,0,0,1,1
4387,2025-05-04,95922.868424,1.904895e+12,1.379755e+10,0,0,0,0,2,0,1,1,1
4388,2025-05-05,94326.620485,1.872812e+12,1.490312e+10,0,0,0,0,4,3,4,3,1
4389,2025-05-06,94758.823711,1.882511e+12,2.408646e+10,0,0,2,16,6,5,0,7,1


In [10]:
# Persiapan DataFrame utama
df_full = btc_pre.copy()
df_full['snapped_at'] = pd.to_datetime(df_full['snapped_at'])
df_full = df_full.set_index('snapped_at')
df_full.sort_index(inplace=True) # Pastikan data terurut waktu

# Konfigurasi prediksi
TARGET_COL = 'price'
N_FUTURE_PERIODS = 21
LAST_HISTORICAL_DATE = df_full.index.max()
start_date = pd.to_datetime(LAST_HISTORICAL_DATE + timedelta(days=1))

print(f"Data historis terakhir pada: {LAST_HISTORICAL_DATE.strftime('%Y-%m-%d')}")
print(f"Jumlah baris data historis: {len(df_full)}")
print(f"Prediksi dimulai dari: {start_date.strftime('%Y-%m-%d')}")

Data historis terakhir pada: 2025-05-07
Jumlah baris data historis: 4391
Prediksi dimulai dari: 2025-05-08


In [11]:
df_full.columns

Index(['price', 'market_cap', 'total_volume', 'stars', 'forks',
       'issues_opened', 'issues_closed', 'pulls_opened', 'pulls_merged',
       'pulls_closed', 'commits', 'issue_comments'],
      dtype='object')

## Multivariate Time Series

In [12]:
# Gunakan semua fitur kecuali target (price)
X_features = [col for col in df_full.columns if col != 'price']

### XGBoost

In [13]:
# 1. Konfigurasi
best_params = {
    'n_estimators': 1000,
    'learning_rate': 0.7,
    'objective': 'reg:squarederror',
    'max_depth': 11,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'gamma': 3,
    'lambda': 3,
    'alpha': 3,
    'min_child_weight': 10
}

# 2. Model price berdasarkan fitur exogenous (tanpa prediksi fitur exogenous)
df_price = df_full.copy()

X = df_price[X_features]
y = df_price['price']

model_price = xgb.XGBRegressor(**best_params)
model_price.fit(X, y)

# 3. Untuk prediksi masa depan, hitung trend dari data historis
def calculate_trend(series, window=30):
    """Hitung trend linear dari data historis"""
    recent_data = series.tail(window)
    if len(recent_data) < 2:
        return 0
    
    x = np.arange(len(recent_data))
    y = recent_data.values
    
    # Linear regression sederhana
    slope = np.polyfit(x, y, 1)[0]
    return slope

# Hitung trend untuk setiap fitur exogenous
trends = {}
for feat in X_features:
    trends[feat] = calculate_trend(df_full[feat])

print(f"\nTrend yang dihitung:")
for feat, trend in trends.items():
    print(f"  {feat}: {trend:.6f}")

# Nilai awal (terakhir) dari fitur exogenous
initial_values = df_full[X_features].iloc[-1].copy()
print(f"\nNilai awal fitur:")
for feat in X_features:
    print(f"  {feat}: {initial_values[feat]:.2f}")

# 4. Prediksi harga untuk 21 hari ke depan dengan trend
future_dates = pd.date_range(start=start_date, periods=N_FUTURE_PERIODS)
future_predictions = []
predicted_features = []

for i in range(N_FUTURE_PERIODS):
    # Update fitur berdasarkan trend
    current_features = initial_values.copy()
    for feat in X_features:
        current_features[feat] = initial_values[feat] + (trends[feat] * (i + 1))
    
    # Prediksi harga
    pred_price = model_price.predict([current_features.values])[0]
    future_predictions.append(pred_price)
    predicted_features.append(current_features.copy())

# 5. Buat DataFrame hasil prediksi
predicted_future_X = pd.DataFrame({
    'price_pred_xgb': future_predictions
}, index=future_dates)

# Tambahkan fitur yang diprediksi untuk referensi
for feat in X_features:
    predicted_future_X[feat] = [pf[feat] for pf in predicted_features]

predicted_future_X


Trend yang dihitung:
  market_cap: 13167165580.770658
  total_volume: -1075236863.141995
  stars: 0.000000
  forks: 0.000000
  issues_opened: -0.000445
  issues_closed: 0.107453
  pulls_opened: 0.098776
  pulls_merged: -0.022024
  pulls_closed: 0.013348
  commits: 0.071190
  issue_comments: -0.033370

Nilai awal fitur:
  market_cap: 1923643840814.60
  total_volume: 23348052607.07
  stars: 0.00
  forks: 0.00
  issues_opened: 2.00
  issues_closed: 3.00
  pulls_opened: 10.00
  pulls_merged: 4.00
  pulls_closed: 1.00
  commits: 17.00
  issue_comments: 1.00


Unnamed: 0,price_pred_xgb,market_cap,total_volume,stars,forks,issues_opened,issues_closed,pulls_opened,pulls_merged,pulls_closed,commits,issue_comments
2025-05-08,98015.835938,1936811000000.0,22272820000.0,0.0,0.0,1.999555,3.107453,10.098776,3.977976,1.013348,17.07119,0.96663
2025-05-09,97964.898438,1949978000000.0,21197580000.0,0.0,0.0,1.99911,3.214905,10.197553,3.955951,1.026696,17.14238,0.933259
2025-05-10,100641.265625,1963145000000.0,20122340000.0,0.0,0.0,1.998665,3.322358,10.296329,3.933927,1.040044,17.213571,0.899889
2025-05-11,100608.34375,1976313000000.0,19047110000.0,0.0,0.0,1.99822,3.429811,10.395106,3.911902,1.053393,17.284761,0.866518
2025-05-12,100573.226562,1989480000000.0,17971870000.0,0.0,0.0,1.997775,3.537264,10.493882,3.889878,1.066741,17.355951,0.833148
2025-05-13,100498.890625,2002647000000.0,16896630000.0,0.0,0.0,1.99733,3.644716,10.592659,3.867853,1.080089,17.427141,0.799778
2025-05-14,103587.976562,2015814000000.0,15821390000.0,0.0,0.0,1.996885,3.752169,10.691435,3.845829,1.093437,17.498331,0.766407
2025-05-15,103589.617188,2028981000000.0,14746160000.0,0.0,0.0,1.99644,3.859622,10.790211,3.823804,1.106785,17.569522,0.733037
2025-05-16,103796.1875,2042148000000.0,13670920000.0,0.0,0.0,1.995996,3.967075,10.888988,3.80178,1.120133,17.640712,0.699666
2025-05-17,103637.484375,2055315000000.0,12595680000.0,0.0,0.0,1.995551,4.074527,10.987764,3.779755,1.133482,17.711902,0.666296


In [14]:
# predicted_future_X.to_csv('Prediction/btc-xgb-21d-price.csv', index=True, sep=',')

In [15]:
# df_future_dates = pd.read_csv('Prediction/btc-xgb-21d-price.csv')
# df_future_dates['date'] = pd.date_range(start='2025-05-08', periods=len(df_future_dates))
# df_future_dates.set_index('date', inplace=True)

df_future_dates = predicted_future_X.copy()

start_date = df_future_dates.index[0]
evaluate_and_plot_prediction('XGBoost', df_future_dates, 'price_pred_xgb', 'btc-usd-max_21days.csv', N_FUTURE_PERIODS, start_date, coin_name='BTC')


--- Prediksi 21 Hari ke Depan (XGBoost) ---
            price_pred_xgb
2025-05-08    98015.835938
2025-05-09    97964.898438
2025-05-10   100641.265625
2025-05-11   100608.343750
2025-05-12   100573.226562
2025-05-13   100498.890625
2025-05-14   103587.976562
2025-05-15   103589.617188
2025-05-16   103796.187500
2025-05-17   103637.484375
2025-05-18   103617.757812
2025-05-19   103617.609375
2025-05-20   103607.625000
2025-05-21   103623.750000
2025-05-22   103631.843750
2025-05-23   103632.656250
2025-05-24   103660.789062
2025-05-25   103482.179688
2025-05-26   103488.578125
2025-05-27   103485.421875
2025-05-28   103479.726562

--- Evaluasi Prediksi XGBoost dengan Data Aktual ---

--- Metrik Evaluasi untuk XGBoost Pred vs Actual ---
RMSE: 3855.2508
MAE: 3130.3636
MAPE: 2.92%
R2 Score: -0.4513


In [16]:
# Visualisasi feature importance XGBoost
importances = model_price.feature_importances_
fig = px.bar(x=X_features, y=importances, labels={'x': 'Fitur', 'y': 'Importance'},
             title='Feature Importance - XGBoost')
fig.show()
for feat, imp in zip(X_features, importances):
    print(f"{feat}: {imp:.4f}")


market_cap: 0.9990
total_volume: 0.0002
stars: 0.0000
forks: 0.0000
issues_opened: 0.0001
issues_closed: 0.0001
pulls_opened: 0.0001
pulls_merged: 0.0001
pulls_closed: 0.0001
commits: 0.0001
issue_comments: 0.0001


### Random Forest

In [17]:
# 1. Konfigurasi
rf_params = {
    'ccp_alpha': True,
    'oob_score': True,
    'random_state': 42
}

# 2. Model price berdasarkan fitur exogenous (tanpa prediksi fitur exogenous)
df_price = df_full.copy()
df_price.dropna(subset=X_features + ['price'], inplace=True)

X = df_price[X_features]
y = df_price['price']

model_price = RandomForestRegressor(**rf_params)
model_price.fit(X, y)

# 3. Untuk prediksi masa depan, hitung trend dari data historis
def calculate_trend_rf(series, window=30):
    """Hitung trend linear dari data historis"""
    recent_data = series.tail(window)
    if len(recent_data) < 2:
        return 0
    
    x = np.arange(len(recent_data))
    y = recent_data.values
    
    # Linear regression sederhana
    slope = np.polyfit(x, y, 1)[0]
    return slope

# Hitung trend untuk setiap fitur exogenous
trends = {}
for feat in X_features:
    trends[feat] = calculate_trend_rf(df_full[feat])

# Nilai awal (terakhir) dari fitur exogenous
initial_values = df_full[X_features].iloc[-1].copy()

# 4. Prediksi harga untuk 21 hari ke depan dengan trend
future_dates = pd.date_range(start=start_date, periods=N_FUTURE_PERIODS)
future_predictions = []

for i in range(N_FUTURE_PERIODS):
    # Update fitur berdasarkan trend
    current_features = initial_values.copy()
    for feat in X_features:
        current_features[feat] = initial_values[feat] + (trends[feat] * (i + 1))
    
    pred_price = model_price.predict(pd.DataFrame([current_features], columns=X_features))[0]
    future_predictions.append(pred_price)

# 5. Buat DataFrame hasil prediksi
predicted_future_X = pd.DataFrame({
    'price_pred_rf': future_predictions
}, index=future_dates)

predicted_future_X

Unnamed: 0,price_pred_rf
2025-05-08,97823.138944
2025-05-09,98454.701365
2025-05-10,99257.637798
2025-05-11,99957.524179
2025-05-12,100335.836767
2025-05-13,101434.995204
2025-05-14,101650.570202
2025-05-15,102204.737554
2025-05-16,102749.19353
2025-05-17,104015.122435


In [18]:
# predicted_future_X.to_csv('Prediction/btc-rf-21d-price.csv', index=True, sep=',')

In [19]:
# df_future_dates = pd.read_csv('Prediction/btc-rf-21d-price.csv')
# df_future_dates['date'] = pd.date_range(start='2025-05-08', periods=len(df_future_dates))
# df_future_dates.set_index('date', inplace=True)

df_future_dates = predicted_future_X.copy()

start_date = df_future_dates.index[0]
evaluate_and_plot_prediction('Random Forest', df_future_dates, 'price_pred_rf', 'btc-usd-max_21days.csv', N_FUTURE_PERIODS, start_date, 'BTC')


--- Prediksi 21 Hari ke Depan (Random Forest) ---
            price_pred_rf
2025-05-08   97823.138944
2025-05-09   98454.701365
2025-05-10   99257.637798
2025-05-11   99957.524179
2025-05-12  100335.836767
2025-05-13  101434.995204
2025-05-14  101650.570202
2025-05-15  102204.737554
2025-05-16  102749.193530
2025-05-17  104015.122435
2025-05-18  104285.033002
2025-05-19  104757.517053
2025-05-20  105656.575967
2025-05-21  105657.657263
2025-05-22  105696.184854
2025-05-23  105720.760287
2025-05-24  105720.760287
2025-05-25  105720.760287
2025-05-26  105720.760287
2025-05-27  105720.760287
2025-05-28  105720.760287

--- Evaluasi Prediksi Random Forest dengan Data Aktual ---

--- Metrik Evaluasi untuk Random Forest Pred vs Actual ---
RMSE: 2907.7116
MAE: 2442.9710
MAPE: 2.30%
R2 Score: 0.1745


In [20]:
# Visualisasi feature importance Random Forest
importances = model_price.feature_importances_
fig = px.bar(x=X_features, y=importances, labels={'x': 'Fitur', 'y': 'Importance'},
             title='Feature Importance - Random Forest')
fig.show()
for feat, imp in zip(X_features, importances):
    print(f"{feat}: {imp:.4f}")


market_cap: 0.9998
total_volume: 0.0001
stars: 0.0000
forks: 0.0000
issues_opened: 0.0000
issues_closed: 0.0000
pulls_opened: 0.0000
pulls_merged: 0.0000
pulls_closed: 0.0000
commits: 0.0000
issue_comments: 0.0001


### Decision Tree

In [21]:
# 1. Konfigurasi
dt_params = {
    'random_state': 42
}

# 2. Model price berdasarkan fitur exogenous (tanpa prediksi fitur exogenous)
df_price = df_full.copy()
df_price.dropna(subset=X_features + ['price'], inplace=True)

X = df_price[X_features]
y = df_price['price']

model_price = DecisionTreeRegressor(**dt_params)
model_price.fit(X, y)

# 3. Untuk prediksi masa depan, hitung trend dari data historis
def calculate_trend_dt(series, window=30):
    """Hitung trend linear dari data historis"""
    recent_data = series.tail(window)
    if len(recent_data) < 2:
        return 0
    
    x = np.arange(len(recent_data))
    y = recent_data.values
    
    # Linear regression sederhana
    slope = np.polyfit(x, y, 1)[0]
    return slope

# Hitung trend untuk setiap fitur exogenous
trends = {}
for feat in X_features:
    trends[feat] = calculate_trend_dt(df_full[feat])

# Nilai awal (terakhir) dari fitur exogenous
initial_values = df_full[X_features].iloc[-1].copy()

# 4. Prediksi harga untuk 21 hari ke depan dengan trend
future_dates = pd.date_range(start=start_date, periods=N_FUTURE_PERIODS)
future_predictions = []

for i in range(N_FUTURE_PERIODS):
    # Update fitur berdasarkan trend
    current_features = initial_values.copy()
    for feat in X_features:
        current_features[feat] = initial_values[feat] + (trends[feat] * (i + 1))
    
    # Prediksi harga
    pred_price = model_price.predict(pd.DataFrame([current_features], columns=X_features))[0]
    future_predictions.append(pred_price)

# 5. Buat DataFrame hasil prediksi
predicted_future_X = pd.DataFrame({
    'price_pred_dtree': future_predictions
}, index=future_dates)

predicted_future_X

Unnamed: 0,price_pred_dtree
2025-05-08,97851.353771
2025-05-09,98384.317382
2025-05-10,99781.829992
2025-05-11,99781.829992
2025-05-12,100313.152398
2025-05-13,101235.371703
2025-05-14,101466.860666
2025-05-15,102552.248743
2025-05-16,102552.248743
2025-05-17,103673.535095


In [22]:
# predicted_future_X.to_csv('Prediction/btc-dtree-21d-price.csv', index=True, sep=',')

In [23]:
# df_future_dates = pd.read_csv('Prediction/btc-dtree-21d-price.csv')
# df_future_dates['date'] = pd.date_range(start='2025-05-08', periods=len(df_future_dates))
# df_future_dates.set_index('date', inplace=True)

df_future_dates = predicted_future_X.copy()

start_date = df_future_dates.index[0]
evaluate_and_plot_prediction('Decision Tree', df_future_dates, 'price_pred_dtree', 'btc-usd-max_21days.csv', N_FUTURE_PERIODS, start_date, 'BTC')


--- Prediksi 21 Hari ke Depan (Decision Tree) ---
            price_pred_dtree
2025-05-08      97851.353771
2025-05-09      98384.317382
2025-05-10      99781.829992
2025-05-11      99781.829992
2025-05-12     100313.152398
2025-05-13     101235.371703
2025-05-14     101466.860666
2025-05-15     102552.248743
2025-05-16     102552.248743
2025-05-17     103673.535095
2025-05-18     103673.535095
2025-05-19     104796.040777
2025-05-20     106182.236820
2025-05-21     106182.236820
2025-05-22     106182.236820
2025-05-23     106182.236820
2025-05-24     106182.236820
2025-05-25     106182.236820
2025-05-26     106182.236820
2025-05-27     106182.236820
2025-05-28     106182.236820

--- Evaluasi Prediksi Decision Tree dengan Data Aktual ---

--- Metrik Evaluasi untuk Decision Tree Pred vs Actual ---
RMSE: 2720.0368
MAE: 2241.0716
MAPE: 2.11%
R2 Score: 0.2776

--- Metrik Evaluasi untuk Decision Tree Pred vs Actual ---
RMSE: 2720.0368
MAE: 2241.0716
MAPE: 2.11%
R2 Score: 0.2776


In [24]:
# Visualisasi feature importance Decision Tree
importances = model_price.feature_importances_
fig = px.bar(x=X_features, y=importances, labels={'x': 'Fitur', 'y': 'Importance'},
             title='Feature Importance - Decision Tree')
fig.show()
for feat, imp in zip(X_features, importances):
    print(f"{feat}: {imp:.4f}")


market_cap: 0.9998
total_volume: 0.0001
stars: 0.0000
forks: 0.0000
issues_opened: 0.0000
issues_closed: 0.0000
pulls_opened: 0.0000
pulls_merged: 0.0000
pulls_closed: 0.0000
commits: 0.0000
issue_comments: 0.0001


# ETH

In [25]:
eth = load_and_merge('ETH')
eth

Unnamed: 0,snapped_at,price,market_cap,total_volume,stars,forks,issues_opened,issues_closed,pulls_opened,pulls_merged,pulls_closed,commits,issue_comments
0,2015-08-07,2.831620,0.000000e+00,9.062200e+04,0,0,9,2,5,6,0,12,2
1,2015-08-08,1.330750,8.033948e+07,3.680700e+05,0,0,3,0,2,1,0,2,2
2,2015-08-10,0.687586,4.155631e+07,4.004641e+05,0,0,3,3,2,0,0,2,2
3,2015-08-11,1.067379,6.453901e+07,1.518998e+06,0,0,1,2,4,3,0,6,2
4,2015-08-12,1.256613,7.601326e+07,2.073893e+06,0,0,4,1,3,2,0,5,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3556,2025-05-03,1841.427591,2.222690e+11,1.168630e+10,0,0,0,1,0,1,1,1,0
3557,2025-05-04,1834.501912,2.214950e+11,6.704990e+09,0,0,0,0,3,2,1,2,0
3558,2025-05-05,1808.156094,2.182163e+11,7.479846e+09,0,0,1,3,6,6,5,8,0
3559,2025-05-06,1820.004460,2.197699e+11,1.054523e+10,0,0,1,3,5,2,4,2,0


In [26]:
# eth_pre = preprocess(eth).drop(columns=['issue_activity', 'commits'])
eth_pre = eth.copy()
eth_pre

Unnamed: 0,snapped_at,price,market_cap,total_volume,stars,forks,issues_opened,issues_closed,pulls_opened,pulls_merged,pulls_closed,commits,issue_comments
0,2015-08-07,2.831620,0.000000e+00,9.062200e+04,0,0,9,2,5,6,0,12,2
1,2015-08-08,1.330750,8.033948e+07,3.680700e+05,0,0,3,0,2,1,0,2,2
2,2015-08-10,0.687586,4.155631e+07,4.004641e+05,0,0,3,3,2,0,0,2,2
3,2015-08-11,1.067379,6.453901e+07,1.518998e+06,0,0,1,2,4,3,0,6,2
4,2015-08-12,1.256613,7.601326e+07,2.073893e+06,0,0,4,1,3,2,0,5,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3556,2025-05-03,1841.427591,2.222690e+11,1.168630e+10,0,0,0,1,0,1,1,1,0
3557,2025-05-04,1834.501912,2.214950e+11,6.704990e+09,0,0,0,0,3,2,1,2,0
3558,2025-05-05,1808.156094,2.182163e+11,7.479846e+09,0,0,1,3,6,6,5,8,0
3559,2025-05-06,1820.004460,2.197699e+11,1.054523e+10,0,0,1,3,5,2,4,2,0


In [27]:
# Persiapan DataFrame utama
df_full = eth_pre.copy()
df_full['snapped_at'] = pd.to_datetime(df_full['snapped_at'])
df_full = df_full.set_index('snapped_at')
df_full.sort_index(inplace=True) # Pastikan data terurut waktu

print(f"Data historis terakhir pada: {LAST_HISTORICAL_DATE.strftime('%Y-%m-%d')}")
print(f"Jumlah baris data historis: {len(df_full)}")

Data historis terakhir pada: 2025-05-07
Jumlah baris data historis: 3561


In [28]:
df_full

Unnamed: 0_level_0,price,market_cap,total_volume,stars,forks,issues_opened,issues_closed,pulls_opened,pulls_merged,pulls_closed,commits,issue_comments
snapped_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-08-07,2.831620,0.000000e+00,9.062200e+04,0,0,9,2,5,6,0,12,2
2015-08-08,1.330750,8.033948e+07,3.680700e+05,0,0,3,0,2,1,0,2,2
2015-08-10,0.687586,4.155631e+07,4.004641e+05,0,0,3,3,2,0,0,2,2
2015-08-11,1.067379,6.453901e+07,1.518998e+06,0,0,1,2,4,3,0,6,2
2015-08-12,1.256613,7.601326e+07,2.073893e+06,0,0,4,1,3,2,0,5,2
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-03,1841.427591,2.222690e+11,1.168630e+10,0,0,0,1,0,1,1,1,0
2025-05-04,1834.501912,2.214950e+11,6.704990e+09,0,0,0,0,3,2,1,2,0
2025-05-05,1808.156094,2.182163e+11,7.479846e+09,0,0,1,3,6,6,5,8,0
2025-05-06,1820.004460,2.197699e+11,1.054523e+10,0,0,1,3,5,2,4,2,0


In [29]:
# Gunakan semua fitur kecuali target (price)
X_features = [col for col in df_full.columns if col != 'price']

### XGBoost

In [30]:
# 1. Konfigurasi
best_params = {
    'n_estimators': 1000,
    'learning_rate': 0.6,
    'objective': 'reg:squarederror'
}

# 2. Model price berdasarkan fitur exogenous (tanpa prediksi fitur exogenous)
df_price = df_full.copy()

X = df_price[X_features]
y = df_price['price']

# Debug info
print(f"Rentang harga historis: ${y.min():.2f} - ${y.max():.2f}")
print(f"Harga rata-rata: ${y.mean():.2f}")
print(f"Harga terakhir: ${y.iloc[-1]:.2f}")
print(f"Jumlah data: {len(y)}")

model_price = xgb.XGBRegressor(**best_params)
model_price.fit(X, y)

# 3. Untuk prediksi masa depan, hitung trend dari data historis
def calculate_trend_eth(series, window=30):
    """Hitung trend linear dari data historis"""
    recent_data = series.tail(window)
    if len(recent_data) < 2:
        return 0
    
    x = np.arange(len(recent_data))
    y = recent_data.values
    
    # Linear regression sederhana
    slope = np.polyfit(x, y, 1)[0]
    return slope

# Hitung trend untuk setiap fitur exogenous
trends = {}
for feat in X_features:
    trends[feat] = calculate_trend_eth(df_full[feat])

# Nilai awal (terakhir) dari fitur exogenous
initial_values = df_full[X_features].iloc[-1].copy()

# 4. Prediksi harga untuk 21 hari ke depan dengan trend
future_dates = pd.date_range(start=start_date, periods=N_FUTURE_PERIODS)
future_predictions = []
predicted_features = []

for i in range(N_FUTURE_PERIODS):
    # Update fitur berdasarkan trend
    current_features = initial_values.copy()
    for feat in X_features:
        current_features[feat] = initial_values[feat] + (trends[feat] * (i + 1))
    
    # Prediksi harga
    pred_price = model_price.predict([current_features.values])[0]
    future_predictions.append(pred_price)
    predicted_features.append(current_features.copy())

# 5. Buat DataFrame hasil prediksi
predicted_future_X = pd.DataFrame({
    'price_pred_xgb': future_predictions
}, index=future_dates)

# Tambahkan fitur yang diprediksi untuk referensi
for feat in X_features:
    predicted_future_X[feat] = [pf[feat] for pf in predicted_features]

predicted_future_X

Rentang harga historis: $0.43 - $4815.00
Harga rata-rata: $1194.77
Harga terakhir: $1816.17
Jumlah data: 3561


Unnamed: 0,price_pred_xgb,market_cap,total_volume,stars,forks,issues_opened,issues_closed,pulls_opened,pulls_merged,pulls_closed,commits,issue_comments
2025-05-08,1817.785522,220737400000.0,11415520000.0,0.0,0.0,-0.027586,-0.05673,5.951502,2.00356,4.096107,1.999778,0.0
2025-05-09,1840.890259,222168900000.0,10880640000.0,0.0,0.0,-0.055172,-0.113459,5.903003,2.007119,4.192214,1.999555,0.0
2025-05-10,1851.906616,223600500000.0,10345770000.0,0.0,0.0,-0.082759,-0.170189,5.854505,2.010679,4.28832,1.999333,0.0
2025-05-11,1863.785889,225032000000.0,9810893000.0,0.0,0.0,-0.110345,-0.226919,5.806007,2.014238,4.384427,1.99911,0.0
2025-05-12,1876.863159,226463500000.0,9276018000.0,0.0,0.0,-0.137931,-0.283648,5.757508,2.017798,4.480534,1.998888,0.0
2025-05-13,1876.483154,227895000000.0,8741143000.0,0.0,0.0,-0.165517,-0.340378,5.70901,2.021357,4.576641,1.998665,0.0
2025-05-14,1902.951416,229326600000.0,8206268000.0,0.0,0.0,-0.193103,-0.397108,5.660512,2.024917,4.672747,1.998443,0.0
2025-05-15,1906.70874,230758100000.0,7671393000.0,0.0,0.0,-0.22069,-0.453838,5.612013,2.028476,4.768854,1.99822,0.0
2025-05-16,1905.888672,232189600000.0,7136518000.0,0.0,0.0,-0.248276,-0.510567,5.563515,2.032036,4.864961,1.997998,0.0
2025-05-17,1929.171631,233621200000.0,6601643000.0,0.0,0.0,-0.275862,-0.567297,5.515017,2.035595,4.961068,1.997775,0.0


In [31]:
# predicted_future_X.to_csv('Prediction/eth-xgb-21d-price.csv', index=True, sep=',')

In [32]:
# df_future_dates = pd.read_csv('Prediction/eth-xgb-21d-price.csv')
# df_future_dates['date'] = pd.date_range(start='2025-05-08', periods=len(df_future_dates))
# df_future_dates.set_index('date', inplace=True)

df_future_dates = predicted_future_X.copy()

start_date = df_future_dates.index[0]

evaluate_and_plot_prediction('XGBoost', df_future_dates, 'price_pred_xgb', 'eth-usd-max_21days.csv', N_FUTURE_PERIODS, start_date, 'ETH')


--- Prediksi 21 Hari ke Depan (XGBoost) ---
            price_pred_xgb
2025-05-08     1817.785522
2025-05-09     1840.890259
2025-05-10     1851.906616
2025-05-11     1863.785889
2025-05-12     1876.863159
2025-05-13     1876.483154
2025-05-14     1902.951416
2025-05-15     1906.708740
2025-05-16     1905.888672
2025-05-17     1929.171631
2025-05-18     1930.656982
2025-05-19     1950.958740
2025-05-20     1950.175049
2025-05-21     1988.877686
2025-05-22     1989.035767
2025-05-23     1989.817749
2025-05-24     2036.271118
2025-05-25     2035.476440
2025-05-26     2040.004272
2025-05-27     2052.946289
2025-05-28     2074.223877

--- Evaluasi Prediksi XGBoost dengan Data Aktual ---

--- Metrik Evaluasi untuk XGBoost Pred vs Actual ---
RMSE: 569.8773
MAE: 549.1374
MAPE: 21.67%
R2 Score: -8.6246


In [33]:
# Visualisasi feature importance XGBoost (ETH)
importances = model_price.feature_importances_
fig = px.bar(x=X_features, y=importances, labels={'x': 'Fitur', 'y': 'Importance'},
             title='Feature Importance - XGBoost (ETH)')
fig.show()
for feat, imp in zip(X_features, importances):
    print(f"{feat}: {imp:.4f}")


market_cap: 0.9988
total_volume: 0.0002
stars: 0.0000
forks: 0.0000
issues_opened: 0.0001
issues_closed: 0.0001
pulls_opened: 0.0001
pulls_merged: 0.0001
pulls_closed: 0.0001
commits: 0.0000
issue_comments: 0.0004


### Random Forest

In [34]:
# 1. Konfigurasi
rf_params = {
    'random_state': 42
}

# 2. Model price berdasarkan fitur exogenous (tanpa prediksi fitur exogenous)
df_price = df_full.copy()
df_price.dropna(subset=X_features + ['price'], inplace=True)

X = df_price[X_features]
y = df_price['price']

model_price = RandomForestRegressor(**rf_params)
model_price.fit(X, y)

# 3. Untuk prediksi masa depan, hitung trend dari data historis
def calculate_trend_rf(series, window=30):
    """Hitung trend linear dari data historis"""
    recent_data = series.tail(window)
    if len(recent_data) < 2:
        return 0
    
    x = np.arange(len(recent_data))
    y = recent_data.values
    
    # Linear regression sederhana
    slope = np.polyfit(x, y, 1)[0]
    return slope

# Hitung trend untuk setiap fitur exogenous
trends = {}
for feat in X_features:
    trends[feat] = calculate_trend_rf(df_full[feat])

# Nilai awal (terakhir) dari fitur exogenous
initial_values = df_full[X_features].iloc[-1].copy()

# 4. Prediksi harga untuk 21 hari ke depan dengan trend
future_dates = pd.date_range(start=start_date, periods=N_FUTURE_PERIODS)
future_predictions = []

for i in range(N_FUTURE_PERIODS):
    # Update fitur berdasarkan trend
    current_features = initial_values.copy()
    for feat in X_features:
        current_features[feat] = initial_values[feat] + (trends[feat] * (i + 1))
    
    # Prediksi harga
    pred_price = model_price.predict(pd.DataFrame([current_features], columns=X_features))[0]
    future_predictions.append(pred_price)

# 5. Buat DataFrame hasil prediksi
predicted_future_X = pd.DataFrame({
    'price_pred_rf': future_predictions
}, index=future_dates)

predicted_future_X

Unnamed: 0,price_pred_rf
2025-05-08,1832.886812
2025-05-09,1842.255585
2025-05-10,1862.794496
2025-05-11,1870.406483
2025-05-12,1880.314956
2025-05-13,1894.784254
2025-05-14,1910.864522
2025-05-15,1919.464332
2025-05-16,1932.2959
2025-05-17,1945.080461


In [35]:
# predicted_future_X.to_csv('Prediction/eth-rf-21d-price.csv', index=True, sep=',')

In [36]:
# df_future_dates = pd.read_csv('Prediction/eth-xgb-21d-price.csv')
# df_future_dates['date'] = pd.date_range(start='2025-05-08', periods=len(df_future_dates))
# df_future_dates.set_index('date', inplace=True)

df_future_dates = predicted_future_X.copy()

start_date = df_future_dates.index[0]

evaluate_and_plot_prediction('Random Forest', df_future_dates, 'price_pred_rf', 'eth-usd-max_21days.csv', N_FUTURE_PERIODS, start_date, 'ETH')


--- Prediksi 21 Hari ke Depan (Random Forest) ---
            price_pred_rf
2025-05-08    1832.886812
2025-05-09    1842.255585
2025-05-10    1862.794496
2025-05-11    1870.406483
2025-05-12    1880.314956
2025-05-13    1894.784254
2025-05-14    1910.864522
2025-05-15    1919.464332
2025-05-16    1932.295900
2025-05-17    1945.080461
2025-05-18    1947.251292
2025-05-19    1965.117942
2025-05-20    1972.209692
2025-05-21    1978.427004
2025-05-22    1998.162975
2025-05-23    2010.137838
2025-05-24    2021.552904
2025-05-25    2032.058765
2025-05-26    2054.544179
2025-05-27    2068.346633
2025-05-28    2070.862601

--- Evaluasi Prediksi Random Forest dengan Data Aktual ---

--- Metrik Evaluasi untuk Random Forest Pred vs Actual ---
RMSE: 560.6051
MAE: 541.1022
MAPE: 21.36%
R2 Score: -8.3140


In [37]:
# Visualisasi feature importance Random Forest (ETH)
importances = model_price.feature_importances_
fig = px.bar(x=X_features, y=importances, labels={'x': 'Fitur', 'y': 'Importance'},
             title='Feature Importance - Random Forest (ETH)')
fig.show()
for feat, imp in zip(X_features, importances):
    print(f"{feat}: {imp:.4f}")


market_cap: 0.9997
total_volume: 0.0002
stars: 0.0000
forks: 0.0000
issues_opened: 0.0000
issues_closed: 0.0000
pulls_opened: 0.0000
pulls_merged: 0.0000
pulls_closed: 0.0000
commits: 0.0000
issue_comments: 0.0001


### Decision Tree

In [38]:
# 1. Konfigurasi
dt_params = {
    'random_state': 42
}

# 2. Model price berdasarkan fitur exogenous (tanpa prediksi fitur exogenous)
df_price = df_full.copy()
df_price.dropna(subset=X_features + ['price'], inplace=True)

X = df_price[X_features]
y = df_price['price']

model_price = DecisionTreeRegressor(**dt_params)
model_price.fit(X, y)

# 3. Untuk prediksi masa depan, hitung trend dari data historis
def calculate_trend_dt_eth(series, window=30):
    """Hitung trend linear dari data historis"""
    recent_data = series.tail(window)
    if len(recent_data) < 2:
        return 0
    
    x = np.arange(len(recent_data))
    y = recent_data.values
    
    # Linear regression sederhana
    slope = np.polyfit(x, y, 1)[0]
    return slope

# Hitung trend untuk setiap fitur exogenous
trends = {}
for feat in X_features:
    trends[feat] = calculate_trend_dt_eth(df_full[feat])

# Nilai awal (terakhir) dari fitur exogenous
initial_values = df_full[X_features].iloc[-1].copy()

# 4. Prediksi harga untuk 21 hari ke depan dengan trend
future_dates = pd.date_range(start=start_date, periods=N_FUTURE_PERIODS)
future_predictions = []

for i in range(N_FUTURE_PERIODS):
    # Update fitur berdasarkan trend
    current_features = initial_values.copy()
    for feat in X_features:
        current_features[feat] = initial_values[feat] + (trends[feat] * (i + 1))
    
    # Prediksi harga
    pred_price = model_price.predict(pd.DataFrame([current_features], columns=X_features))[0]
    future_predictions.append(pred_price)

# 5. Buat DataFrame hasil prediksi
predicted_future_X = pd.DataFrame({
    'price_pred_dtree': future_predictions
}, index=future_dates)

predicted_future_X

Unnamed: 0,price_pred_dtree
2025-05-08,1834.826453
2025-05-09,1846.064264
2025-05-10,1859.80242
2025-05-11,1871.533106
2025-05-12,1877.541328
2025-05-13,1895.230148
2025-05-14,1907.212322
2025-05-15,1923.03599
2025-05-16,1936.701164
2025-05-17,1940.44481


In [39]:
# predicted_future_X.to_csv('Prediction/eth-dtree-21d-price.csv', index=True, sep=',')

In [40]:
# df_future_dates = pd.read_csv('Prediction/eth-dtree-21d-price.csv')
# df_future_dates['date'] = pd.date_range(start='2025-05-08', periods=len(df_future_dates))
# df_future_dates.set_index('date', inplace=True)

df_future_dates = predicted_future_X.copy()

start_date = df_future_dates.index[0]
evaluate_and_plot_prediction('Decision Tree', df_future_dates, 'price_pred_dtree', 'eth-usd-max_21days.csv', N_FUTURE_PERIODS, start_date, 'ETH')


--- Prediksi 21 Hari ke Depan (Decision Tree) ---
            price_pred_dtree
2025-05-08       1834.826453
2025-05-09       1846.064264
2025-05-10       1859.802420
2025-05-11       1871.533106
2025-05-12       1877.541328
2025-05-13       1895.230148
2025-05-14       1907.212322
2025-05-15       1923.035990
2025-05-16       1936.701164
2025-05-17       1940.444810
2025-05-18       1939.785688
2025-05-19       1964.529383
2025-05-20       1974.581194
2025-05-21       1974.581194
2025-05-22       1993.612966
2025-05-23       2009.883543
2025-05-24       2025.937328
2025-05-25       2025.937328
2025-05-26       2051.756718
2025-05-27       2068.598435
2025-05-28       2068.598435

--- Evaluasi Prediksi Decision Tree dengan Data Aktual ---

--- Metrik Evaluasi untuk Decision Tree Pred vs Actual ---
RMSE: 561.6702
MAE: 542.2215
MAPE: 21.41%
R2 Score: -8.3494


In [41]:
# Visualisasi feature importance Decision Tree (ETH)
importances = model_price.feature_importances_
fig = px.bar(x=X_features, y=importances, labels={'x': 'Fitur', 'y': 'Importance'},
             title='Feature Importance - Decision Tree (ETH)')
fig.show()
for feat, imp in zip(X_features, importances):
    print(f"{feat}: {imp:.4f}")


market_cap: 0.9996
total_volume: 0.0002
stars: 0.0000
forks: 0.0000
issues_opened: 0.0000
issues_closed: 0.0000
pulls_opened: 0.0000
pulls_merged: 0.0000
pulls_closed: 0.0000
commits: 0.0000
issue_comments: 0.0001
