In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv(r"C:\Users\DoBUY\Downloads\train.csv")

# Select features and target
X = data[['GrLivArea', 'YearBuilt']].values
y = data['SalePrice'].values

# Split the data into train and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)



In [2]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Define models
models = [
    LinearRegression(),
    DecisionTreeRegressor(),
    SVR()
]

# Train models
for model in models:
    model.fit(X_train, y_train)

# Generate predictions
preds = []
for model in models:
    preds.append(model.predict(X_val))

# Blend predictions (simple averaging)
blend_pred = sum(preds) / len(preds)

# Evaluate performance
mse_blend = mean_squared_error(y_val, blend_pred)
print("Blending MSE:", mse_blend)


Blending MSE: 2696836742.6874914


In [3]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Number of models (bags)
n_models = 5

# List to store models
models = []

# Train n_models on different bootstrap samples
for _ in range(n_models):
    # Create bootstrap sample
    indices = np.random.choice(len(X_train), size=len(X_train), replace=True)
    X_bag = X_train[indices]
    y_bag = y_train[indices]
    
    # Train decision tree model
    model = DecisionTreeRegressor()
    model.fit(X_bag, y_bag)
    
    # Add trained model to list
    models.append(model)

# Generate predictions
bagging_preds = np.zeros_like(y_val, dtype=float)
for model in models:
    bagging_preds += model.predict(X_val)

# Average predictions
bagging_preds /= n_models

# Calculate MSE
mse_bagging = mean_squared_error(y_val, bagging_preds)
print("Bagging MSE:", mse_bagging)


Bagging MSE: 1935217631.3602672


In [36]:

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

class Stacking:
    def __init__(self, base_models, meta_model):
        self.base_models = base_models
        self.meta_model = meta_model

    def fit_predict(self, X_train, y_train, X_test, K0=3, M0=2):
        # Stage 0
        models_stage0 = []
        blend_train_stage0 = np.zeros((len(X_train), len(self.base_models)))

        for model in self.base_models:
            blend_data = np.zeros(len(X_train))
            for k in range(K0):
                X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=k)
                model.fit(X_tr, y_tr)
                blend_data += model.predict(X_val)
                blend_train_stage0[:, len(models_stage0)] = model.predict(X_val)
            blend_data /= K0
            models_stage0.append(blend_data)

        # Stage n
        for i in range(1, len(self.base_models)):
            blend_data = np.zeros((len(X_train), M0 * i))
            for j in range(M0 * i):
                base_model_index = j % i
                base_model = self.base_models[base_model_index]
                X_blend_train = np.hstack([blend_train_stage0[:, k] for k in range(base_model_index, len(self.base_models), i)])
                for k in range(K0):
                    X_tr, X_val, y_tr, y_val = train_test_split(X_blend_train, y_train, test_size=0.2, random_state=k)
                    base_model.fit(X_tr, y_tr)
                    blend_data[:, j] = base_model.predict(X_val)
            blend_train_stage0 = blend_data

        # Final Stage
        self.meta_model.fit(blend_train_stage0, y_train)
        blend_test = np.zeros((len(X_test), len(self.base_models)))
        for i, model in enumerate(self.base_models):
            blend_test[:, i] = model.predict(X_test)
        final_pred = self.meta_model.predict(blend_test)
        return final_pred


# Example usage:
# Instantiate base models
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression

base_model1 = DecisionTreeRegressor()
base_model2 = LinearRegression()

# Instantiate meta model
meta_model = LinearRegression()

# Instantiate stacking model
stacking_model = Stacking(base_models=[base_model1, base_model2], meta_model=meta_model)

import numpy as np

# Generate sample dataset
np.random.seed(0)
X = np.random.rand(100, 2)  # 100 samples, 2 features
y = 2 * X[:, 0] - 3 * X[:, 1] + np.random.randn(100)  # Target variable

# Split the dataset into training and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

final_pred = stacking_model.fit_predict(X_train, y_train, X_test)



ValueError: operands could not be broadcast together with shapes (80,) (16,) (80,) 

In [40]:
X_val.shape

y_val.shape

(292,)