In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import (
    train_test_split, KFold, StratifiedKFold, LeaveOneOut, 
    ShuffleSplit, GroupKFold, GridSearchCV, RandomizedSearchCV
)
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

data = pd.read_csv("boston.csv")
data.dropna(inplace=True) 

median_price = np.median(data["MEDV"])
data["MEDV_Class"] = np.where(data["MEDV"] >= median_price, 1, 0)

scaler = StandardScaler()
X = scaler.fit_transform(data.drop(columns=["MEDV", "MEDV_Class"]))
y = data["MEDV_Class"].values

groups = np.random.randint(0, 5, size=len(y))  

print("\n--- Відкладена вибірка ---")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

loo = LeaveOneOut()
log_reg = LogisticRegression(max_iter=1000)
scores_loo = []
for train_idx, test_idx in loo.split(X_train):
    log_reg.fit(X_train[train_idx], y_train[train_idx])
    scores_loo.append(log_reg.score(X_train[test_idx].reshape(1, -1), y_train[test_idx]))
print(f"Leave-One-Out CV (середня точність): {np.mean(scores_loo):.4f}")

print("\n--- Відкладена + Валідаційна вибірка ---")
for shuffle in [True, False]:
    cv = KFold(n_splits=3, shuffle=shuffle, random_state=42 if shuffle else None)
    scores_kfold = []
    for train_idx, test_idx in cv.split(X_train):
        log_reg.fit(X_train[train_idx], y_train[train_idx])
        scores_kfold.append(log_reg.score(X_train[test_idx], y_train[test_idx]))
    print(f"3-Fold CV (shuffle={shuffle}): {np.mean(scores_kfold):.4f}")

print("\n--- Сітковий пошук ---")
param_grid = {"C": [0.1, 1, 10], "penalty": ["l1", "l2"], "solver": ["liblinear"]}
cv_methods = {
    "KFold": KFold(n_splits=5, shuffle=True, random_state=42),
    "StratifiedKFold": StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    "ShuffleSplit": ShuffleSplit(n_splits=5, test_size=0.2, random_state=42),
    "GroupKFold": GroupKFold(n_splits=3),
}
best_models = {}

for name, cv in cv_methods.items():
    if name == "GroupKFold":
        grid_search = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=cv)
        grid_search.fit(X_train, y_train, groups=groups[:len(y_train)])
    else:
        grid_search = GridSearchCV(LogisticRegression(max_iter=1000), param_grid, cv=cv)
        grid_search.fit(X_train, y_train)
    
    best_models[name] = grid_search.best_estimator_
    print(f"{name}: Найкращі параметри: {grid_search.best_params_}")

print("\n--- Випадковий пошук ---")
param_dist = {"C": np.logspace(-2, 2, 10), "penalty": ["l1", "l2"], "solver": ["liblinear"]}

for name, cv in cv_methods.items():
    if name == "GroupKFold":
        random_search = RandomizedSearchCV(LogisticRegression(max_iter=1000), param_distributions=param_dist, cv=cv, n_iter=5)
        random_search.fit(X_train, y_train, groups=groups[:len(y_train)])
    else:
        random_search = RandomizedSearchCV(LogisticRegression(max_iter=1000), param_distributions=param_dist, cv=cv, n_iter=5)
        random_search.fit(X_train, y_train)
    
    best_models[name] = random_search.best_estimator_
    print(f"{name}: Найкращі параметри: {random_search.best_params_}")

print("\n--- Оцінка найкращої моделі ---")
best_model = max(best_models.values(), key=lambda model: model.score(X_test, y_test))
y_pred = best_model.predict(X_test)

print(f"Точність найкращої моделі: {accuracy_score(y_test, y_pred):.4f}")
print("\n Classification Report:\n", classification_report(y_test, y_pred))

plt.figure(figsize=(6, 5))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap='Blues', fmt='d')
plt.xlabel("Прогноз")
plt.ylabel("Реальне значення")
plt.title("Матриця помилок для найкращої моделі")
plt.show()
