<a href="https://colab.research.google.com/github/Jakelinecs/Tareas-Machine-Learning/blob/main/N21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

try:
    df = pd.read_csv('train.csv')
except FileNotFoundError:
    print("train.csvが見つかりません。ファイルをダウンロードしてください。")
    df = pd.DataFrame({
        'SalePrice': np.random.rand(100) * 100000 + 150000,
        'GrLivArea': np.random.rand(100) * 1500 + 1000,
        'YearBuilt': np.random.randint(1900, 2010, 100)
    })

X = df[['GrLivArea', 'YearBuilt']].values
y = df['SalePrice'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

lr = LinearRegression()
svr = SVR(kernel='rbf')
dt = DecisionTreeRegressor(max_depth=5, random_state=42)

In [8]:
lr.fit(X_train, y_train)
svr.fit(X_train, y_train)
dt.fit(X_train, y_train)

pred_lr = lr.predict(X_val)
pred_svr = svr.predict(X_val)
pred_dt = dt.predict(X_val)

pred_blend = (pred_lr + pred_svr + pred_dt) / 3

mse_lr = mean_squared_error(y_val, pred_lr)
mse_blend = mean_squared_error(y_val, pred_blend)

print(f"MSE (Linear Regression): {mse_lr:.2f}")
print(f"MSE (Blended Average):   {mse_blend:.2f}")

MSE (Linear Regression): 2495554898.67
MSE (Blended Average):   2820579658.44


In [9]:
from sklearn.utils import resample

n_estimators = 5
base_estimator = DecisionTreeRegressor(max_depth=5, random_state=42)

predictions = []

for i in range(n_estimators):
    X_sample, y_sample = resample(X_train, y_train, random_state=i)

    estimator = base_estimator.__class__(**base_estimator.get_params())
    estimator.fit(X_sample, y_sample)

    pred_i = estimator.predict(X_val)
    predictions.append(pred_i)

pred_bagging = np.mean(predictions, axis=0)

mse_bagging = mean_squared_error(y_val, pred_bagging)
print(f"MSE (Bagging - Scratch Style): {mse_bagging:.2f}")

MSE (Bagging - Scratch Style): 1673137888.68


In [10]:
estimators = [
    ('lr', LinearRegression()),
    ('svr', SVR(kernel='rbf')),
    ('dt', DecisionTreeRegressor(max_depth=5, random_state=42))
]

meta_learner = LinearRegression()

stage0_predictions_train = []
stage0_predictions_val = []

for name, model in estimators:
    model.fit(X_train, y_train)

    pred_train = model.predict(X_train)
    stage0_predictions_train.append(pred_train)

    pred_val = model.predict(X_val)
    stage0_predictions_val.append(pred_val)

X_meta = np.column_stack(stage0_predictions_train)
X_meta_val = np.column_stack(stage0_predictions_val)

meta_learner.fit(X_meta, y_train)

pred_stacking = meta_learner.predict(X_meta_val)

mse_stacking = mean_squared_error(y_val, pred_stacking)
print(f"MSE (Stacking - Scratch Style): {mse_stacking:.2f}")

MSE (Stacking - Scratch Style): 1792498145.76
