In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error


df=pd.read_csv('train.csv')
x=df[['GrLivArea','YearBuilt']]
y=df['SalePrice']

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
model1=LinearRegression()
model2=SVR()
model3=DecisionTreeRegressor()
model1.fit(x_train,y_train)
model2.fit(x_train,y_train)
model3.fit(x_train,y_train)

model1_pred=model1.predict(x_test)
model2_pred=model2.predict(x_test)
model3_pred=model3.predict(x_test)

sse1=mean_squared_error(y_test,model1_pred)
sse2=mean_squared_error(y_test,model2_pred)
sse2=mean_squared_error(y_test,model3_pred)


#[Problem 1] Blending scratch mounting
blend_pred=(model1_pred+model2_pred+model3_pred)/3
sse_blend=mean_squared_error(y_test,blend_pred)
print(sse_blend)



2710913415.047034


In [24]:
#[Problem 2] Scratch mounting of bagging

class BaggingScratch:
    def __init__(self, models):
        self.models = models
        self.predictions = None

    def fit(self, x_train, y_train):
        for model in self.models:
            model.fit(x_train, y_train)

    def predict(self, x_test):
        preds = []
        for model in self.models:
            preds.append(model.predict(x_test))
        self.predictions = np.mean(preds, axis=0)
        return self.predictions

    def score(self, x_test, y_test):
        if self.predictions is None:
            self.predict(x_test)
        return mean_squared_error(y_test, self.predictions)

In [26]:
bagging = BaggingScratch([model1, model2, model3])
bagging.fit(x_train, y_train)
y_pred = bagging.predict(x_test)
mse = bagging.score(x_test, y_test)

print("MSE do Bagging:", mse)

MSE do Bagging: 2769574524.6610875


In [30]:
#[Problem 3] Stacking scratch mounting
from sklearn.base import clone

class stackingScratch:
    def __init__(self, base_models, meta_model):
        self.base_models = base_models
        self.meta_model = meta_model
        self.fitted_base_models = []
        self.fitted_meta_model = None

    def fit(self, X, y):
        base_predictions = []
        self.fitted_base_models = []
        for model in self.base_models:
            cloned_model = clone(model)
            cloned_model.fit(X, y)
            self.fitted_base_models.append(cloned_model)
            base_predictions.append(cloned_model.predict(X))
        X_meta = np.vstack(base_predictions).T

        self.fitted_meta_model = clone(self.meta_model)
        self.fitted_meta_model.fit(X_meta, y)

    def predict(self, X):
        base_preds = [model.predict(X) for model in self.fitted_base_models]
        X_meta = np.vstack(base_preds).T
        return self.fitted_meta_model.predict(X_meta)

    def score(self, X, y):
        y_pred = self.predict(X)
        return mean_squared_error(y, y_pred)

In [31]:
base_models = [LinearRegression(), DecisionTreeRegressor()]
meta_model = DecisionTreeRegressor(max_depth=3)
stacking = stackingScratch(base_models, meta_model)
stacking.fit(x_train, y_train)

y_pred = stacking.predict(x_test)
mse_stacking = stacking.score(x_test, y_test)

print( mse_stacking)

2427861821.649006
