In [None]:
import pandas as pd
from sklearn.datasets import load_boston

boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = pd.Series(boston.target, name='MEDV')
df = pd.concat([X, y], axis=1)

print("데이터 크기:", df.shape)
df.head()

In [None]:
df.describe()
df.isnull().sum()

In [None]:
import matplotlib.pyplot as plt
plt.hist(df['MEDV'], bins=30, color='skyblue', edgecolor='black')
plt.title('Target (MEDV) Distribution')
plt.xlabel('MEDV ($1000s)')
plt.ylabel('Frequency')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = df.drop('MEDV', axis=1)
y = df['MEDV']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

models = {
    'Linear Regression': LinearRegression(),
    'KNN': KNeighborsRegressor(n_neighbors=5),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42)
}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} - MSE: {mse:.2f}, RMSE: {rmse:.2f}, R^2: {r2:.2f}")