In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import arviz as az
import optuna
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from scipy.stats import norm

In [None]:
df = pd.read_csv(r"C:\Users\Robyi\Documents\Data Science Dataset\cancer.csv")
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)
df.head()

In [None]:
X = df.drop(columns=["target"])
y = df["target"]

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
class BayesianLogisticRegression:
    def _init_(self, mu=0, sigma=1):
        self.mu = mu
        self.sigma = sigma
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w_mean = np.zeros(n_features)
        self.w_std = np.ones(n_features) * self.sigma
        
        for i in range(n_features):
            posterior_mu = np.mean(X[:, i] * y) / (self.sigma**2 + np.var(X[:, i]))
            posterior_sigma = np.sqrt(1 / (1/self.sigma**2 + 1/np.var(X[:, i])))
            self.w_mean[i] = posterior_mu
            self.w_std[i] = posterior_sigma
    
    def predict_proba(self, X):
        mean_pred = X @ self.w_mean
        std_pred = np.sqrt(np.sum((X * self.w_std) ** 2, axis=1))
        prob = norm.cdf(mean_pred / (std_pred + 1e-9)) 
        return prob

    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) >= threshold).astype(int)

In [None]:
def objective(trial):
    sigma = trial.suggest_float("sigma", 0.1, 5.0)

    model = BayesianLogisticRegression(sigma=sigma)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    return accuracy_score(y_test, y_pred)

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

In [None]:
best_sigma = study.best_params["sigma"]
print(f"ðŸ”¹ Best Sigma: {best_sigma:.3f}")

In [None]:
best_model = BayesianLogisticRegression(sigma=best_sigma)
best_model.fit(X_train, y_train)

In [None]:
y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)

In [None]:
print("ðŸ”¹ Classification Report:\n", classification_report(y_test, y_pred))

In [None]:
sns.histplot(y_proba[y_test == 0], label="Class 0", color="blue", kde=True, alpha=0.6)
sns.histplot(y_proba[y_test == 1], label="Class 1", color="red", kde=True, alpha=0.6)
plt.xlabel("Predicted Probability")
plt.ylabel("Density")
plt.title("Probability Distribution of Bayesian Logistic Regression")
plt.legend()
plt.show()