In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
)

df = pd.read_csv("boston.csv")
df.dropna(inplace=True)

X = df.drop(columns=["MEDV"])
y = df["MEDV"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = {
    "Модель A (всі ознаки)": X_train,
    "Модель B (половина ознак)": X_train.iloc[:, :int(X_train.shape[1] / 2)],
    "Модель C (3 найважливіші ознаки)": X_train[["RM", "LSTAT", "PTRATIO"]]
}

results = []
for name, X_train_subset in models.items():
    X_test_subset = X_test[X_train_subset.columns] 
    model = LinearRegression()
    model.fit(X_train_subset, y_train)
    y_pred = model.predict(X_test_subset)

    r2 = r2_score(y_test, y_pred)
    adj_r2 = 1 - (1 - r2) * (len(y_test) - 1) / (len(y_test) - X_train_subset.shape[1] - 1)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    explained_var = explained_variance_score(y_test, y_pred)

    results.append([name, r2, adj_r2, mse, rmse, mae, explained_var])

    plt.figure(figsize=(5, 5))
    plt.scatter(y_test, y_pred, alpha=0.7)
    plt.xlabel("Фактичні значення")
    plt.ylabel("Прогнозовані значення")
    plt.title(f"Прогнози vs Фактичні: {name}")
    plt.show()

results_df = pd.DataFrame(
    results, columns=["Модель", "R²", "Adj R²", "MSE", "RMSE", "MAE", "Пояснена дисперсія"]
)
print(results_df)

for name, X_train_subset in models.items():
    X_test_subset = X_test[X_train_subset.columns]
    model = LinearRegression()
    model.fit(X_train_subset, y_train)
    y_pred = model.predict(X_test_subset)
    residuals = y_test - y_pred

    plt.figure(figsize=(5, 5))
    sns.histplot(residuals, bins=20, kde=True)
    plt.xlabel("Залишки")
    plt.title(f"Графік залишків: {name}")
    plt.show() 


from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc, precision_recall_curve
)
from sklearn.preprocessing import StandardScaler

# Підготовка даних для класифікації
df["MEDV_Class"] = np.where(df["MEDV"] >= np.median(df["MEDV"]), 1, 0)
X_clf = df.drop(columns=["MEDV", "MEDV_Class"])
y_clf = df["MEDV_Class"]

scaler = StandardScaler()
X_clf = scaler.fit_transform(X_clf)

X_train, X_test, y_train, y_test = train_test_split(X_clf, y_clf, test_size=0.2, random_state=42)

# Створення моделей
models_clf = {
    "Логістична регресія": LogisticRegression(max_iter=1000),
    "Дерево рішень": DecisionTreeClassifier(),
    "Випадковий ліс": RandomForestClassifier(n_estimators=100)
}

# Оцінка моделей
results_clf = []
for name, model in models_clf.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    acc = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    results_clf.append([name, acc, precision, recall, f1])

    # Матриця помилок
    plt.figure(figsize=(5, 5))
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap="Blues", fmt="d")
    plt.xlabel("Прогноз")
    plt.ylabel("Реальне значення")
    plt.title(f"Матриця помилок: {name}")
    plt.show()

    # ROC-крива
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(6, 5))
    plt.plot(fpr, tpr, label=f"{name} (AUC = {roc_auc:.2f})")
    plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC-криві")
    plt.legend()
    plt.show()

results_clf_df = pd.DataFrame(
    results_clf, columns=["Модель", "Accuracy", "Precision", "Recall", "F1"]
)
print(results_clf_df)
