In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
from sklearn.preprocessing import StandardScaler
from hmmlearn.hmm import GaussianHMM

In [None]:
df = pd.read_csv(r"C:\Users\Robyi\Documents\Data Science Dataset\boston.csv")
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
df.head()

In [None]:
target_col = "price" 
df = df[[target_col]].dropna()

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df)

In [None]:
hmm_model = GaussianHMM(n_components=5, covariance_type="full", random_state=42)

In [None]:
hmm_model.fit(X_scaled)

In [None]:
hidden_states = hmm_model.predict(X_scaled)

In [None]:
df["State"] = hidden_states

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df[target_col], label="Price", color="black", alpha=0.7)
plt.scatter(df.index, df[target_col], c=df["State"], cmap="coolwarm", label="Hidden State")
plt.xlabel("Time")
plt.ylabel("Price")
plt.title("Bayesian Hidden Markov Model - Time Series Clustering")
plt.legend()
plt.show()

In [None]:
log_likelihood = hmm_model.score_samples(X_scaled)[0]

In [None]:
threshold = np.percentile(log_likelihood, 5)
anomalies = log_likelihood < threshold

In [None]:
df["Anomaly"] = anomalies.astype(int)

print(f"ðŸ”¹ Jumlah Anomali yang Ditemukan: {df['Anomaly'].sum()}")

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df[target_col], label="Price", color="black", alpha=0.7)
plt.scatter(df.index, df[target_col], c=df["Anomaly"], cmap="coolwarm", label="Anomaly")
plt.xlabel("Time")
plt.ylabel("Price")
plt.title("Bayesian HMM - Anomaly Detection")
plt.legend()
plt.show()

In [None]:
def objective(trial):
    n_states = trial.suggest_int("n_components", 2, 10) 
    
    model = GaussianHMM(n_components=n_states, covariance_type="full", random_state=42)
    model.fit(X_scaled)
    
    score = model.score(X_scaled)
    return score

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

In [None]:
best_n_states = study.best_params["n_components"]
print(f"ðŸ”¹ Best Number of States: {best_n_states}")

In [None]:
best_model = GaussianHMM(n_components=best_n_states, covariance_type="full", random_state=42)
best_model.fit(X_scaled)
df["Best State"] = best_model.predict(X_scaled)