In [None]:
# =============================================================================
# LANGKAH 0: INSTALASI DAN UNDUH DATA OTOMATIS
# =============================================================================
# Menginstal library yfinance untuk mengunduh data saham
!pip install yfinance

# Mengimpor library yang dibutuhkan
import yfinance as yf
import pandas as pd

# Mengunduh data saham BBCA dari Yahoo Finance
try:
    ticker_saham = "BBCA.JK"
    tanggal_mulai = "2010-01-01"
    tanggal_akhir = pd.to_datetime('today').strftime('%Y-%m-%d') # Mengambil data hingga hari ini

    data = yf.download(ticker_saham, start=tanggal_mulai, end=tanggal_akhir)
    
    # Menyimpan data ke CSV untuk digunakan oleh app.py nanti
    data.to_csv('data_saham_BBCA.csv')
    
    print(f"Data saham {ticker_saham} berhasil diunduh.")
    
except Exception as e:
    print(f"Gagal mengunduh data. Error: {e}")
    print("Membuat data dummy agar sisa kode bisa berjalan.")
    # Membuat data dummy jika unduhan gagal
    date_rng = pd.date_range(start='2010-01-01', end='2023-01-01', freq='D')
    dummy_data = {'Close': np.random.randint(20000, 35000, size=(len(date_rng)))}
    data = pd.DataFrame(dummy_data, index=date_rng)


# =============================================================================
# LANGKAH 1: IMPORT LIBRARY UNTUK ANALISIS
# =============================================================================
import numpy as np 
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
import pickle

# Library untuk analisis time series
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Library untuk peramalan
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error

# =============================================================================
# LANGKAH 2: PRA-PEMROSESAN DATA
# =============================================================================
# Kita hanya akan menggunakan kolom 'Close' untuk prediksi
df_close = data[['Close']]
df_close.dropna(inplace=True) # Menghapus nilai yang hilang jika ada

# Menampilkan plot data historis
plt.figure(figsize=(12, 6))
plt.title('Harga Penutupan Saham BBCA Historis')
plt.xlabel('Tahun')
plt.ylabel('Harga Penutupan (IDR)')
plt.plot(df_close['Close'])
plt.grid(True)
plt.show()


# =============================================================================
# LANGKAH 3: UJI STASIONERITAS
# =============================================================================
# Fungsi untuk tes Augmented Dickey-Fuller (ADF)
def adf_test(timeseries):
    print('Hasil Uji Dickey-Fuller:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print(dfoutput)
    if dfoutput['p-value'] <= 0.05:
        print("=> Data stasioner")
    else:
        print("=> Data tidak stasioner")

print("--- Uji pada data asli ---")
adf_test(df_close['Close'])

# Karena data harga saham biasanya tidak stasioner, kita lakukan differencing
df_diff = df_close.diff().dropna()

print("\n--- Uji pada data setelah differencing ---")
adf_test(df_diff['Close'])


# =============================================================================
# LANGKAH 4: TENTUKAN PARAMETER ARIMA (p, d, q)
# =============================================================================
# d = 1 karena kita melakukan differencing 1 kali
# p dan q ditentukan dari plot PACF dan ACF
fig, ax = plt.subplots(1, 2, figsize=(16, 4))
plot_pacf(df_diff, lags=20, ax=ax[0])
ax[0].set_title('Partial Autocorrelation Function (PACF)')
plot_acf(df_diff, lags=20, ax=ax[1])
ax[1].set_title('Autocorrelation Function (ACF)')
plt.show()
# Dari plot, kita bisa coba p=5 (dari PACF) dan q=5 (dari ACF) sebagai awal.

# Membagi data menjadi data latih dan data uji
train_size = int(len(df_close) * 0.8)
train_data, test_data = df_close[0:train_size], df_close[train_size:]


# =============================================================================
# LANGKAH 5: BANGUN DAN LATIH MODEL ARIMA
# =============================================================================
# Kita akan coba order (p,d,q) = (5,1,5)
# Ini mungkin membutuhkan waktu beberapa saat untuk training
model_arima = ARIMA(train_data['Close'], order=(5, 1, 5))
model_fit = model_arima.fit()

# Menampilkan ringkasan model
print(model_fit.summary())


# =============================================================================
# LANGKAH 6: PREDIKSI DAN EVALUASI
# =============================================================================
# Membuat prediksi
start_index = len(train_data)
end_index = len(df_close) - 1
predictions = model_fit.predict(start=start_index, end=end_index, typ='levels').rename('ARIMA Predictions')

# Plot hasil prediksi vs data aktual
plt.figure(figsize=(12, 6))
plt.plot(test_data['Close'], label='Data Aktual')
plt.plot(predictions, label='Prediksi ARIMA', color='red')
plt.title('Prediksi Harga Saham vs Data Aktual')
plt.xlabel('Tanggal')
plt.ylabel('Harga Penutupan (IDR)')
plt.legend()
plt.grid(True)
plt.show()

# Evaluasi model
rmse = np.sqrt(mean_squared_error(test_data['Close'], predictions))
mape = mean_absolute_percentage_error(test_data['Close'], predictions)
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2%}")


# =============================================================================
# LANGKAH 7: LATIH ULANG MODEL DENGAN SELURUH DATA DAN SIMPAN
# =============================================================================
final_model = ARIMA(df_close['Close'], order=(5, 1, 5))
final_fit = final_model.fit()

# Forecast untuk 30 hari ke depan
forecast = final_fit.forecast(steps=30)

# Plot forecast
plt.figure(figsize=(12, 6))
plt.plot(df_close['Close'], label='Data Historis')
plt.plot(forecast, label='Forecast 30 Hari', color='orange')
plt.title('Forecast Harga Saham BBCA')
plt.xlabel('Tanggal')
plt.ylabel('Harga Penutupan (IDR)')
plt.legend()
plt.grid(True)
plt.show()

print("\n--- Forecast 30 hari ke depan ---")
print(forecast)

# Simpan Model Final
pickle.dump(final_fit, open('forecast_saham.sav', 'wb'))
print("\nModel berhasil disimpan sebagai 'forecast_saham.sav'")
