<a href="https://colab.research.google.com/github/OmarMachuca851/Task/blob/main/Emsemble_learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Aprendizaje por conjunto

In [1]:
# Loading and Preparing Regresión Dataset to be esed

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Loading train.csv
#from google.colab import files
#uploaded = files.upload()

dfTrain = pd.read_csv('train.csv')
X_train = dfTrain[['GrLivArea', 'YearBuilt']].values
y_train = dfTrain['SalePrice'].values

dfTest = pd.read_csv('test.csv')
X_test = dfTrain[['GrLivArea', 'YearBuilt']].values
y_test = dfTrain['SalePrice'].values


print(f'Training size: {X_train.shape}, Validation size: {X_test.shape}')


Training size: (1460, 2), Validation size: (1460, 2)


## Problema 1: Impletación de scratch de Blending

In [2]:
# Blending scratch mounting

# Fitting different models
modelLR = LinearRegression()
modelSVM = SVR(kernel='rbf', C=1e3, gamma=0.1)
modelDT = DecisionTreeRegressor(max_depth=5, random_state=0)

modelLR.fit(X_train, y_train)
modelSVM.fit(X_train, y_train)
modelDT.fit(X_train, y_train)

# Predicting on validation set
predLR = modelLR.predict(X_test)
predSVM = modelSVM.predict(X_test)
predDT = modelDT.predict(X_test)

# Blending: simple average
blended_pred = (predLR + predSVM + predDT) / 3

# Evaluating
mseLR = mean_squared_error(y_test, predLR)
mseSVM = mean_squared_error(y_test, predSVM)
mseDT = mean_squared_error(y_test, predDT)
mse_blended = mean_squared_error(y_test, blended_pred)

print('[Blending Results]')
print(f'{mseLR=:.2f}\n{mseSVM=:.2f}\n{mseDT=:.2f}\n{mse_blended=:.2f}')

[Blending Results]
mseLR=2182210639.22
mseSVM=6457075183.24
mseDT=1396971332.19
mse_blended=2186798134.00


## Problema 2: Implementación de scratch de Bagging

In [3]:
# Problem 2: Scratch mounting of bagging

np.random.seed(0)
n_models = 5
bagged_preds = []

for i in range(n_models):
    # Bootstrap sample
    indices = np.random.choice(len(X_train), size=len(X_train), replace=True)
    X_sample = X_train[indices]
    y_sample = y_train[indices]

    # Using simple regressor for bagging (Decision Tree)
    model = DecisionTreeRegressor(max_depth=5, random_state=0)
    model.fit(X_sample, y_sample)
    pred = model.predict(X_test)
    bagged_preds.append(pred)

# Average predictions
bagged_pred = np.mean(bagged_preds, axis=0)

# Evaluating
mse_single_tree = mean_squared_error(y_test, modelDT.predict(X_test))
mse_bagged = mean_squared_error(y_test, bagged_pred)

print('[Bagging Results]')
print(f'{mse_single_tree=:2f}\n{mse_bagged=:.2f}')

[Bagging Results]
mse_single_tree=1396971332.187403
mse_bagged=1289477012.52


## Problema 3: Implementación scratch de Stacking

In [4]:
# Problem 3: Stacking scratch mounting

# stage 0: Trainning base models
baseLR = LinearRegression()
baseDT = DecisionTreeRegressor(max_depth=5, random_state=1)

baseLR.fit(X_train, y_train)
baseDT.fit(X_train, y_train)

# Blended data: base model prectitions (level 1 features)
baseLR_train_pred = baseLR.predict(X_train)
baseDT_train_pred = baseDT.predict(X_train)

stacked_X_train = np.vstack((baseLR_train_pred, baseDT_train_pred)).T

# stage 1: Trainnig meta model (simple Linear Regression)
meta = LinearRegression()
meta.fit(stacked_X_train, y_train)

# Applying to validation
baseLR_val_pred = baseLR.predict(X_test)
baseDT_val_pred = baseDT.predict(X_test)

stacked_X_val = np.vstack((baseLR_val_pred, baseDT_val_pred)).T
stacked_pred = meta.predict(stacked_X_val)

# Evaluating
mse_base_LinearRegression = mean_squared_error(y_test, baseLR_val_pred)
mse_base_DecisionTree = mean_squared_error(y_test, baseDT_val_pred)
mse_stacked = mean_squared_error(y_test, stacked_pred)

print('[Stacking Results]')
print(f'{mse_base_LinearRegression=:.2f}\n{mse_base_DecisionTree=:.2f}\n{mse_stacked=:.2f}')

[Stacking Results]
mse_base_LinearRegression=2182210639.22
mse_base_DecisionTree=1396971332.19
mse_stacked=1376567016.76
