In [1]:
import numpy as np 
import pandas as pd 

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV,RandomizedSearchCV
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, RobustScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression,ElasticNet, Lasso
from DataTransformer import columnAdderTransformer,columnDropperTransformer
from time import time

In [2]:
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [3]:
# Numeric features
numeric_features = [ item for item in train._get_numeric_data().columns.tolist() if item not in['Id','PID','SalePrice','Year Built', 'Year Remod/Add','Yr Sold']]
numeric_features = numeric_features + ['Years of Construction','Years of Remodel']

# Non ordinal categorical features
non_ordinal_categ = ['MS Zoning', 'Street', 'Lot Shape','Land Contour','Utilities',\
                     'Lot Config','Neighborhood','Bldg Type','House Style','Roof Style', 'Roof Matl',\
                     'Mas Vnr Type','Foundation','Heating','Electrical','Garage Type','Sale Type',\
                    'Condition 1','Condition 2','Exterior 1st','Exterior 2nd']

# Ordinal categorical features
ordinal_categ_features = ['Land Slope','Exter Qual','Exter Cond','Bsmt Qual','Bsmt Cond',\
                          'Bsmt Exposure','BsmtFin Type 1','BsmtFin Type 2','Heating QC','Central Air',\
                          'Kitchen Qual','Functional','Garage Finish','Garage Qual','Garage Cond','Paved Drive']

# Labels
Land_Slope =[['NA','Gtl', 'Mod', 'Sev']]
Exter_Qual = [['NA','Po', 'Fa','TA', 'Gd', 'Ex']]
Exter_Cond = [['NA','Po', 'Fa', 'TA', 'Gd', 'Ex']]
Bsmt_Qual = [['NA', 'Po', 'Fa','TA', 'Gd', 'Ex']]
Bsmt_Cond = [['NA', 'Po', 'Fa','TA', 'Gd', 'Ex']]
Bsmt_Exposure = [['NA', 'No', 'Mn', 'Av', 'Gd']]
BsmtFinType1 = [['NA', 'Unf', 'LwQ','Rec','BLQ','ALQ','GLQ']]
BsmtFinType2 = [['NA', 'Unf', 'LwQ','Rec','BLQ','ALQ','GLQ']]
Heating_QC = [['NA','Po', 'Fa', 'TA', 'Gd', 'Ex']]
Central_Air = [['NA','N','Y']]
Kitchen_Qual = [['NA','Po', 'Fa', 'TA', 'Gd', 'Ex']]
Functional = [['NA','Sal','Sev','Maj2','Maj1','Mod','Min2','Min1','Typ']]
Garage_Finish = [['NA','Unf','RFn','Fin']]
Garage_Qual = [['NA', 'Po', 'Fa','TA', 'Gd', 'Ex']]
Garage_Cond = [['NA', 'Po', 'Fa','TA', 'Gd', 'Ex']]
Paved_Drive = [['NA','N','P','Y']]

In [4]:
X = train.drop('SalePrice', axis=1)
y = train['SalePrice']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)


In [6]:
col_transformer = Pipeline(steps=[ 
    ('Adder',columnAdderTransformer()),
    ('dropper',columnDropperTransformer(['Fireplace Qu','Fence','Alley','Misc Feature','Pool QC','Year Built', 'Year Remod/Add','Yr Sold']))
    ]) 


numeric_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')) ]) 


categorical_features = non_ordinal_categ
categorical_transformer = Pipeline(steps=[ 
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')), 
    ('onehot', OneHotEncoder(handle_unknown='ignore'))]) 

In [7]:
u = col_transformer.fit_transform(X,y)

# position dict for ordinal_categ_features
l_cols = {v:k for k,v in zip(range(len(u.columns)),u.columns)} 

ocpos =[]
for i in ordinal_categ_features:
    ocpos.append(l_cols[i])
    
# positions for ordinal category
ocpos

[11, 25, 26, 28, 29, 30, 31, 33, 38, 39, 51, 53, 57, 60, 61, 62]

In [8]:
preprocessor = ColumnTransformer( transformers=[ 
    ('o1',OrdinalEncoder(categories= Land_Slope,handle_unknown='use_encoded_value', unknown_value=-1), [11]),
    ('o2',OrdinalEncoder(categories= Exter_Qual,handle_unknown='use_encoded_value', unknown_value=-1), [25]),
    ('o3',OrdinalEncoder(categories= Exter_Cond,handle_unknown='use_encoded_value', unknown_value=-1), [26]),
    ('o4',OrdinalEncoder(categories= Bsmt_Qual,handle_unknown='use_encoded_value', unknown_value=-1), [28]),
    ('o5',OrdinalEncoder(categories= Bsmt_Cond,handle_unknown='use_encoded_value', unknown_value=-1), [29]),
    ('o6',OrdinalEncoder(categories= Bsmt_Exposure,handle_unknown='use_encoded_value', unknown_value=-1), [30]),
    ('o7',OrdinalEncoder(categories= BsmtFinType1,handle_unknown='use_encoded_value', unknown_value=-1), [31]),
    ('o8',OrdinalEncoder(categories= BsmtFinType2,handle_unknown='use_encoded_value', unknown_value=-1), [33]),
    ('o9',OrdinalEncoder(categories= Heating_QC,handle_unknown='use_encoded_value', unknown_value=-1), [38]),
    ('o10',OrdinalEncoder(categories= Central_Air,handle_unknown='use_encoded_value', unknown_value=-1), [39]),
    ('o11',OrdinalEncoder(categories= Kitchen_Qual,handle_unknown='use_encoded_value', unknown_value=-1), [51]),
    ('o12',OrdinalEncoder(categories= Functional,handle_unknown='use_encoded_value', unknown_value=-1), [53]),
    ('o13',OrdinalEncoder(categories= Garage_Finish,handle_unknown='use_encoded_value', unknown_value=-1), [57]),
    ('o14',OrdinalEncoder(categories= Garage_Qual,handle_unknown='use_encoded_value', unknown_value=-1), [60]),
    ('o15',OrdinalEncoder(categories= Garage_Cond,handle_unknown='use_encoded_value', unknown_value=-1), [61]),
    ('o16',OrdinalEncoder(categories= Paved_Drive,handle_unknown='use_encoded_value', unknown_value=-1), [62]), 
    ('num', numeric_transformer, numeric_features), 
    ('cat', categorical_transformer, categorical_features)])


In [9]:
pipeline = Pipeline(steps=[
                      ('transformer', col_transformer),
                      ('preprocessor', preprocessor),
                      ('scaler', RobustScaler()),
                      ('regressor', LinearRegression())
])


In [10]:
pipe = pipeline.fit(X_train,y_train)
pipe

In [11]:
print('Train score:',pipe.score(X_train, y_train))
print('Test score:',pipe.score(X_test,y_test))

Train score: 0.9171606677404956
Test score: 0.9146401169403724


In [16]:
pipeline_0 = Pipeline(steps=[
                      ('transformer', col_transformer),
                      ('preprocessor', preprocessor),
                      ('scaler', RobustScaler()),
                      ('EN',ElasticNet())
])

params0 = { 'EN__alpha':[0.25,0.4,0.5,1.5,3,6],
            'EN__l1_ratio':[0.05,0.2,0.5,0.75,1.0]}
                
t0 = time()
print("Fitting started...")
search0 = RandomizedSearchCV(pipeline_0, param_distributions=params0, verbose=8)
search0.fit(X_train,y_train)
print(f"Fitting took {time() - t0:0.3f}s.")

Fitting started...
Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END ..EN__alpha=0.4, EN__l1_ratio=0.75;, score=0.486 total time=   0.1s
[CV 2/5] END ..EN__alpha=0.4, EN__l1_ratio=0.75;, score=0.897 total time=   0.2s
[CV 3/5] END ..EN__alpha=0.4, EN__l1_ratio=0.75;, score=0.864 total time=   0.1s
[CV 4/5] END ..EN__alpha=0.4, EN__l1_ratio=0.75;, score=0.859 total time=   0.1s
[CV 5/5] END ..EN__alpha=0.4, EN__l1_ratio=0.75;, score=0.880 total time=   0.1s
[CV 1/5] END ...EN__alpha=0.5, EN__l1_ratio=0.2;, score=0.567 total time=   0.1s
[CV 2/5] END ...EN__alpha=0.5, EN__l1_ratio=0.2;, score=0.874 total time=   0.1s
[CV 3/5] END ...EN__alpha=0.5, EN__l1_ratio=0.2;, score=0.842 total time=   0.1s
[CV 4/5] END ...EN__alpha=0.5, EN__l1_ratio=0.2;, score=0.836 total time=   0.1s
[CV 5/5] END ...EN__alpha=0.5, EN__l1_ratio=0.2;, score=0.855 total time=   0.1s
[CV 1/5] END ..EN__alpha=0.25, EN__l1_ratio=0.5;, score=0.497 total time=   0.1s
[CV 2/5] END ..EN__alpha=0.25

  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END .....EN__alpha=6, EN__l1_ratio=1.0;, score=0.437 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END .....EN__alpha=6, EN__l1_ratio=1.0;, score=0.078 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END .....EN__alpha=6, EN__l1_ratio=1.0;, score=0.880 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END .....EN__alpha=6, EN__l1_ratio=1.0;, score=0.891 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END .....EN__alpha=6, EN__l1_ratio=1.0;, score=0.899 total time=   0.3s
[CV 1/5] END ..EN__alpha=0.25, EN__l1_ratio=0.2;, score=0.523 total time=   0.1s
[CV 2/5] END ..EN__alpha=0.25, EN__l1_ratio=0.2;, score=0.889 total time=   0.1s
[CV 3/5] END ..EN__alpha=0.25, EN__l1_ratio=0.2;, score=0.856 total time=   0.1s
[CV 4/5] END ..EN__alpha=0.25, EN__l1_ratio=0.2;, score=0.850 total time=   0.1s
[CV 5/5] END ..EN__alpha=0.25, EN__l1_ratio=0.2;, score=0.871 total time=   0.1s
[CV 1/5] END ..EN__alpha=0.5, EN__l1_ratio=0.05;, score=0.578 total time=   0.1s
[CV 2/5] END ..EN__alpha=0.5, EN__l1_ratio=0.05;, score=0.869 total time=   0.1s
[CV 3/5] END ..EN__alpha=0.5, EN__l1_ratio=0.05;, score=0.837 total time=   0.1s
[CV 4/5] END ..EN__alpha=0.5, EN__l1_ratio=0.05;, score=0.832 total time=   0.1s
[CV 5/5] END ..EN__alpha=0.5, EN__l1_ratio=0.05;, score=0.850 total time=   0.1s
[CV 1/5] END .....EN__alpha=6, EN__l1_ratio=0.5;, score=0.639 total time=   0.1s
[CV 2/5] END .....EN__alpha=

In [17]:
print('Best CV score:',search0.best_score_)
print('Best Model parameters:',search0.best_params_)
print('Test score:',search0.score(X_test,y_test))

Best CV score: 0.7978993326977072
Best Model parameters: {'EN__l1_ratio': 0.2, 'EN__alpha': 0.25}
Test score: 0.893933369720262


In [12]:
pipeline_1 = Pipeline(steps=[
                      ('transformer', col_transformer),
                      ('preprocessor', preprocessor),
                      ('scaler', RobustScaler()),
                      ('poly',PolynomialFeatures()),
                      ('EN',ElasticNet())
])

paramsPolyEN = {'poly__degree':[1,2],
                'EN__alpha':[0.25,0.5,1.5,3,5],
                'EN__l1_ratio':[0.25,0.5,0.75,1.0]}
                
t0 = time()
print("Fitting started...")
search1 = RandomizedSearchCV(pipeline_1, param_distributions=paramsPolyEN, verbose=8)
search1.fit(X_train,y_train)
print(f"Fitting took {time() - t0:0.3f}s.")

Fitting started...
Fitting 5 folds for each of 10 candidates, totalling 50 fits


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=3, EN__l1_ratio=0.75, poly__degree=2;, score=-13.492 total time=  10.3s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=3, EN__l1_ratio=0.75, poly__degree=2;, score=0.513 total time=  10.8s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=3, EN__l1_ratio=0.75, poly__degree=2;, score=0.778 total time=  10.8s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=3, EN__l1_ratio=0.75, poly__degree=2;, score=-0.560 total time=  10.9s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=3, EN__l1_ratio=0.75, poly__degree=2;, score=0.776 total time=  11.0s
[CV 1/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=1;, score=0.537 total time=   0.1s
[CV 2/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=1;, score=0.885 total time=   0.1s
[CV 3/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=1;, score=0.852 total time=   0.1s
[CV 4/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=1;, score=0.846 total time=   0.1s
[CV 5/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=1;, score=0.867 total time=   0.1s


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=3, EN__l1_ratio=0.25, poly__degree=2;, score=-12.696 total time=  11.8s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=3, EN__l1_ratio=0.25, poly__degree=2;, score=0.566 total time=  11.4s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=3, EN__l1_ratio=0.25, poly__degree=2;, score=0.796 total time=  12.6s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=3, EN__l1_ratio=0.25, poly__degree=2;, score=0.248 total time=  12.5s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=3, EN__l1_ratio=0.25, poly__degree=2;, score=0.778 total time=  11.7s


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=0.25, EN__l1_ratio=1.0, poly__degree=2;, score=-1.455 total time=   7.9s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=0.25, EN__l1_ratio=1.0, poly__degree=2;, score=0.440 total time=   7.9s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=0.25, EN__l1_ratio=1.0, poly__degree=2;, score=0.545 total time=   8.0s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=0.25, EN__l1_ratio=1.0, poly__degree=2;, score=0.522 total time=   7.7s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=0.25, EN__l1_ratio=1.0, poly__degree=2;, score=0.688 total time=   7.6s


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=3, EN__l1_ratio=1.0, poly__degree=2;, score=-4.107 total time=   6.4s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=3, EN__l1_ratio=1.0, poly__degree=2;, score=0.180 total time=   6.7s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=3, EN__l1_ratio=1.0, poly__degree=2;, score=0.433 total time=   6.6s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=3, EN__l1_ratio=1.0, poly__degree=2;, score=0.149 total time=   6.6s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=3, EN__l1_ratio=1.0, poly__degree=2;, score=0.695 total time=   6.5s


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=1;, score=0.436 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=1;, score=0.066 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=1;, score=0.879 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=1;, score=0.890 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=1;, score=0.898 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=2;, score=-22.280 total time=  11.4s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=2;, score=0.340 total time=  11.7s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=2;, score=0.749 total time=  11.3s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=2;, score=-1.713 total time=  11.2s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=0.5, EN__l1_ratio=0.5, poly__degree=2;, score=0.747 total time=  11.1s


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=2;, score=-6.251 total time=   6.7s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=2;, score=0.069 total time=   6.3s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=2;, score=0.429 total time=   6.6s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=2;, score=0.053 total time=   6.5s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=5, EN__l1_ratio=1.0, poly__degree=2;, score=0.660 total time=   6.3s


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=0.5, EN__l1_ratio=1.0, poly__degree=1;, score=0.429 total time=   0.4s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=0.5, EN__l1_ratio=1.0, poly__degree=1;, score=0.008 total time=   0.4s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=0.5, EN__l1_ratio=1.0, poly__degree=1;, score=0.875 total time=   0.4s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=0.5, EN__l1_ratio=1.0, poly__degree=1;, score=0.888 total time=   0.4s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=0.5, EN__l1_ratio=1.0, poly__degree=1;, score=0.897 total time=   0.3s


  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END EN__alpha=3, EN__l1_ratio=0.5, poly__degree=2;, score=-12.280 total time=  12.2s


  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END EN__alpha=3, EN__l1_ratio=0.5, poly__degree=2;, score=0.548 total time=  12.1s


  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END EN__alpha=3, EN__l1_ratio=0.5, poly__degree=2;, score=0.792 total time=  13.0s


  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END EN__alpha=3, EN__l1_ratio=0.5, poly__degree=2;, score=0.015 total time=  12.4s


  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END EN__alpha=3, EN__l1_ratio=0.5, poly__degree=2;, score=0.779 total time=  12.8s
Fitting took 341.491s.


In [13]:
print('Best CV score:',search1.best_score_)
print('Best Model parameters:',search1.best_params_)
print('Test score:',search1.score(X_test,y_test))

Best CV score: 0.797493977845869
Best Model parameters: {'poly__degree': 1, 'EN__l1_ratio': 0.5, 'EN__alpha': 0.5}
Test score: 0.8909253654999716


## Conclusion

After comparing a few models, I found the linear model without any feature selection or regularization performs the best

In [18]:
names = pipeline.named_steps['preprocessor'].transformers_[0][1].get_feature_names_out()
for i in range(1,18):
    names = np.concatenate([names, pipeline.named_steps['preprocessor'].transformers_[i][1].get_feature_names_out()])

len(names)

209

In [21]:
trsf = Pipeline(steps=[
                      ('transformer', col_transformer),
                      ('preprocessor', preprocessor),])
df = trsf.transform(X_train)
print(df)

[[1. 4. 3. ... 0. 0. 1.]
 [1. 3. 3. ... 0. 1. 0.]
 [1. 3. 3. ... 0. 0. 0.]
 ...
 [1. 3. 3. ... 1. 0. 0.]
 [1. 4. 3. ... 1. 0. 0.]
 [1. 4. 3. ... 1. 0. 0.]]


In [22]:
## specify feature names as columns
all_features = pd.DataFrame(df, columns=names)
all_features.head()

Unnamed: 0,Land Slope,Exter Qual,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin Type 2,Heating QC,Central Air,Kitchen Qual,Functional,Garage Finish,Garage Qual,Garage Cond,Paved Drive,MS SubClass,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,TotRms AbvGrd,Fireplaces,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Years of Construction,Years of Remodel,MS Zoning_A (agr),MS Zoning_C (all),MS Zoning_FV,MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Grvl,Street_Pave,Lot Shape_IR1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_Bnk,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_AllPub,Utilities_NoSeWa,Lot Config_Corner,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Neighborhood_Blmngtn,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_GrnHill,Neighborhood_IDOTRR,Neighborhood_Landmrk,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_1Fam,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Fin,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Flat,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_ClyTile,Roof Matl_CompShg,Roof Matl_Membran,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Mas Vnr Type_BrkCmn,Mas Vnr Type_BrkFace,Mas Vnr Type_None,Mas Vnr Type_Stone,Mas Vnr Type_missing,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_Wall,Electrical_FuseA,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_SBrkr,Garage Type_2Types,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_missing,Sale Type_COD,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_WD,Condition 1_Artery,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Artery,Condition 2_Feedr,Condition 2_Norm,Condition 2_PosA,Condition 2_PosN,Condition 2_RRAe,Condition 2_RRNn,Exterior 1st_AsbShng,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CBlock,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_ImStucc,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_Stone,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsbShng,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Plywood,Exterior 2nd_Stone,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng
0,1.0,4.0,3.0,5.0,3.0,4.0,6.0,1.0,3.0,2.0,3.0,8.0,2.0,3.0,3.0,3.0,85.0,68.0,9927.0,7.0,5.0,252.0,1005.0,0.0,42.0,1047.0,1083.0,0.0,0.0,1083.0,1.0,0.0,1.0,0.0,2.0,1.0,5.0,1.0,1976.0,2.0,596.0,444.0,0.0,40.0,0.0,0.0,0.0,0.0,7.0,30.0,30.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,1.0,3.0,3.0,3.0,3.0,1.0,5.0,2.0,3.0,2.0,3.0,8.0,2.0,3.0,3.0,3.0,20.0,68.0,14357.0,5.0,5.0,0.0,311.0,167.0,386.0,864.0,1187.0,0.0,0.0,1187.0,1.0,0.0,1.0,0.0,2.0,1.0,6.0,1.0,1961.0,2.0,440.0,128.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,46.0,46.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.0,3.0,3.0,3.0,3.0,1.0,5.0,4.0,3.0,2.0,4.0,8.0,2.0,3.0,3.0,3.0,20.0,70.0,8050.0,5.0,6.0,0.0,474.0,38.0,437.0,949.0,1107.0,0.0,0.0,1107.0,1.0,0.0,1.0,0.0,3.0,1.0,5.0,0.0,1967.0,1.0,308.0,88.0,64.0,0.0,0.0,0.0,0.0,0.0,3.0,40.0,14.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,4.0,3.0,4.0,3.0,3.0,6.0,1.0,5.0,2.0,4.0,8.0,3.0,3.0,3.0,3.0,50.0,56.0,14720.0,8.0,5.0,579.0,816.0,0.0,1217.0,2033.0,2053.0,1185.0,0.0,3238.0,1.0,0.0,2.0,1.0,4.0,1.0,9.0,1.0,1996.0,3.0,666.0,283.0,86.0,0.0,0.0,0.0,0.0,0.0,3.0,15.0,14.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1.0,4.0,3.0,4.0,3.0,1.0,1.0,1.0,5.0,2.0,4.0,8.0,3.0,3.0,3.0,3.0,60.0,65.0,10616.0,7.0,5.0,0.0,0.0,0.0,628.0,628.0,628.0,728.0,0.0,1356.0,0.0,0.0,2.0,1.0,3.0,1.0,6.0,1.0,2007.0,2.0,484.0,100.0,24.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [24]:
coefs = pipeline.named_steps["regressor"].coef_.flatten()
coef = pd.DataFrame(zip(all_features.columns, coefs), columns=["feature", "coef"])
coef

Unnamed: 0,feature,coef
0,Land Slope,-3316.595801
1,Exter Qual,9340.565347
2,Exter Cond,-1610.904433
3,Bsmt Qual,2897.275115
4,Bsmt Cond,-3975.109726
5,Bsmt Exposure,4686.358766
6,BsmtFin Type 1,2385.981162
7,BsmtFin Type 2,-398.97776
8,Heating QC,2387.573432
9,Central Air,-7016.334572
