In [3]:
from former import Scaling, Tab_Former, Encoder, log_rmse, run_model

In [4]:
import torch
import numpy as np
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin

In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

# Dummy data for demonstration
X, y = make_regression(n_samples=100, n_features=10, random_state=42)

encoder = Encoder(d_model = 10)
scaling = Scaling().min_max_scaling

pipeline = Pipeline([
    ('scaling', scaling),
    ('encoder', encoder),
    ('model', RandomForestRegressor())
])


In [6]:
param_grid = {
    'encoder__num_layers': [1, 2, 4, 8, 12, 50, 100],
    'encoder__d_ff': [50, 128, 256, 512, 768, 1024, 2048],
    'encoder__dropout': [0.1, 0.2, 0.4, 0.5, 0.8, 0.9],
    'model__n_estimators': [50, 100, 200],
    'model__max_depth': [None, 10, 20]
}


In [7]:
grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X, y)

# Using RandomizedSearchCV for comparison
random_search = RandomizedSearchCV(pipeline, param_distributions=param_grid, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
random_search.fit(X, y)

AttributeError: 'Encoder' object has no attribute 'num_layers'

In [8]:
import torch
import numpy as np
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin

# Define Encoder and Scaling as sklearn compatible transformers
class Encoder(BaseEstimator, TransformerMixin):
    def __init__(self, d_model, num_layers, d_ff, dropout):
        self.d_model = d_model
        self.num_layers = num_layers
        self.d_ff = d_ff
        self.dropout = dropout
        self.model = self.build_model()

    def build_model(self):
        # Create a dummy model for demonstration purposes
        return torch.nn.Sequential(
            torch.nn.Linear(self.d_model, self.d_ff),
            torch.nn.ReLU(),
            torch.nn.Dropout(self.dropout),
            *[torch.nn.Sequential(
                torch.nn.Linear(self.d_ff, self.d_ff),
                torch.nn.ReLU(),
                torch.nn.Dropout(self.dropout)) for _ in range(self.num_layers)],
            torch.nn.Linear(self.d_ff, self.d_model)
        )

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        with torch.no_grad():
            X = torch.tensor(X, dtype=torch.float32)
            return self.model(X).numpy()

class Scaling(BaseEstimator, TransformerMixin):
    def __init__(self, method='z_score'):
        self.method = method
        self.scaler = StandardScaler()

    def fit(self, X, y=None):
        if self.method == 'z_score':
            self.scaler.fit(X)
        return self

    def transform(self, X):
        if self.method == 'z_score':
            return self.scaler.transform(X)
        return X

# Dummy data
X = np.random.rand(100, 10)
y = np.random.rand(100)


In [38]:
param_grid = {
    'scaling__method': ['z_score'],
    'encoder__d_model': [64, 128, 256],
    'encoder__num_layers': [2, 5, 10],
    'encoder__d_ff': [128, 256, 512],
    'encoder__dropout': [0.1, 0.5, 0.9],
    'model__n_estimators': [50, 100, 200],
    'model__max_depth': [None, 10, 20]
}



In [22]:
import pandas as pd

In [23]:
train_df = pd.read_csv('train (2).csv')

In [24]:
train_df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [25]:
y = train_df['SalePrice']

In [26]:
# Feature Engineering
def feature_engineering(df):
    # Total square footage of the house
    df['TotalSF'] = df['TotalBsmtSF'] + df['1stFlrSF'] + df['2ndFlrSF']

    # Total number of bathrooms
    df['TotalBath'] = df['FullBath'] + df['HalfBath'] + df['BsmtFullBath'] + df['BsmtHalfBath']

    # Age of the house at the time of sale
    df['Age'] = df['YrSold'] - df['YearBuilt']

    return df

train_df = feature_engineering(train_df)
# test_df = feature_engineering(test_df)

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
train_df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,TotalSF,TotalBath,Age
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,,0,2,2008,WD,Normal,208500,2566,4,5
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,,0,5,2007,WD,Normal,181500,2524,3,31
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,,0,9,2008,WD,Normal,223500,2706,4,7
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,,0,2,2006,WD,Abnorml,140000,2473,2,91
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,,0,12,2008,WD,Normal,250000,3343,4,8


In [33]:
X = train_df
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.25)

In [40]:

encoder = Encoder(d_model=128, num_layers=5, d_ff=512, dropout=0.5)
scaling = Scaling(method='z_score')

from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

# Dummy data for demonstration
X, y = make_regression(n_samples=100, n_features=10, random_state=42)

# encoder = Encoder()
# scaling = Scaling()

pipeline = Pipeline([
    ('scaling', scaling),
    ('encoder', encoder),
    ('model', RandomForestRegressor())
])

# Using GridSearchCV
grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X,y)

# Using RandomizedSearchCV
random_search = RandomizedSearchCV(pipeline, param_distributions=param_grid, n_iter=10, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
random_search.fit(X_train, y_train)

# Best parameters and score
print("Best parameters (GridSearchCV):", grid_search.best_params_)
print("Best score (GridSearchCV):", grid_search.best_score_)

print("Best parameters (RandomizedSearchCV):", random_search.best_params_)
print("Best score (RandomizedSearchCV):", random_search.best_score_)


ValueError: 
All the 2187 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
729 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\pipeline.py", line 416, in fit
    Xt = self._fit(X, y, **fit_params_steps)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\pipeline.py", line 370, in _fit
    X, fitted_transformer = fit_transform_one_cached(
                            ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\joblib\memory.py", line 349, in __call__
    return self.func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\pipeline.py", line 950, in _fit_transform_one
    res = transformer.fit_transform(X, y, **fit_params)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\utils\_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\base.py", line 918, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\utils\_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_11796\1151761980.py", line 36, in transform
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\container.py", line 215, in forward
    input = module(input)
            ^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\linear.py", line 114, in forward
    return F.linear(input, self.weight, self.bias)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: mat1 and mat2 shapes cannot be multiplied (66x10 and 128x512)

--------------------------------------------------------------------------------
1458 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\pipeline.py", line 416, in fit
    Xt = self._fit(X, y, **fit_params_steps)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\pipeline.py", line 370, in _fit
    X, fitted_transformer = fit_transform_one_cached(
                            ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\joblib\memory.py", line 349, in __call__
    return self.func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\pipeline.py", line 950, in _fit_transform_one
    res = transformer.fit_transform(X, y, **fit_params)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\utils\_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\base.py", line 918, in fit_transform
    return self.fit(X, y, **fit_params).transform(X)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\sklearn\utils\_set_output.py", line 140, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\USER\AppData\Local\Temp\ipykernel_11796\1151761980.py", line 36, in transform
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\container.py", line 215, in forward
    input = module(input)
            ^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\USER\anaconda3\Lib\site-packages\torch\nn\modules\linear.py", line 114, in forward
    return F.linear(input, self.weight, self.bias)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: mat1 and mat2 shapes cannot be multiplied (67x10 and 128x512)
