<div class="alert alert-block alert-info">
    Import <b>Libraries </b> 
</div>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, FunctionTransformer
from sklearn.feature_selection import SelectPercentile, mutual_info_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error, r2_score

<div class="alert alert-block alert-info">
    Load <b>Data </b> 
</div>

In [2]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [3]:
id_col = test_df['Id']
print(len(id_col))

1459


In [4]:
train_df.drop('Id', axis=1, inplace=True)
test_df.drop('Id', axis=1, inplace=True)

<div class="alert alert-block alert-info">
    <b>Understand<b> and <b>Explore </b>data
</div>

In [5]:
train_df.head(4)

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
1,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
2,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
3,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000


In [6]:
test_df.head(4)

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,Gar2,12500,6,2010,WD,Normal
2,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,MnPrv,,0,3,2010,WD,Normal
3,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,,0,6,2010,WD,Normal


In [7]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSSubClass     1460 non-null   int64  
 1   MSZoning       1460 non-null   object 
 2   LotFrontage    1201 non-null   float64
 3   LotArea        1460 non-null   int64  
 4   Street         1460 non-null   object 
 5   Alley          91 non-null     object 
 6   LotShape       1460 non-null   object 
 7   LandContour    1460 non-null   object 
 8   Utilities      1460 non-null   object 
 9   LotConfig      1460 non-null   object 
 10  LandSlope      1460 non-null   object 
 11  Neighborhood   1460 non-null   object 
 12  Condition1     1460 non-null   object 
 13  Condition2     1460 non-null   object 
 14  BldgType       1460 non-null   object 
 15  HouseStyle     1460 non-null   object 
 16  OverallQual    1460 non-null   int64  
 17  OverallCond    1460 non-null   int64  
 18  YearBuil

In [8]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1459 entries, 0 to 1458
Data columns (total 79 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSSubClass     1459 non-null   int64  
 1   MSZoning       1455 non-null   object 
 2   LotFrontage    1232 non-null   float64
 3   LotArea        1459 non-null   int64  
 4   Street         1459 non-null   object 
 5   Alley          107 non-null    object 
 6   LotShape       1459 non-null   object 
 7   LandContour    1459 non-null   object 
 8   Utilities      1457 non-null   object 
 9   LotConfig      1459 non-null   object 
 10  LandSlope      1459 non-null   object 
 11  Neighborhood   1459 non-null   object 
 12  Condition1     1459 non-null   object 
 13  Condition2     1459 non-null   object 
 14  BldgType       1459 non-null   object 
 15  HouseStyle     1459 non-null   object 
 16  OverallQual    1459 non-null   int64  
 17  OverallCond    1459 non-null   int64  
 18  YearBuil

In [9]:
train_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MSSubClass,1460.0,56.89726,42.300571,20.0,20.0,50.0,70.0,190.0
LotFrontage,1201.0,70.049958,24.284752,21.0,59.0,69.0,80.0,313.0
LotArea,1460.0,10516.828082,9981.264932,1300.0,7553.5,9478.5,11601.5,215245.0
OverallQual,1460.0,6.099315,1.382997,1.0,5.0,6.0,7.0,10.0
OverallCond,1460.0,5.575342,1.112799,1.0,5.0,5.0,6.0,9.0
YearBuilt,1460.0,1971.267808,30.202904,1872.0,1954.0,1973.0,2000.0,2010.0
YearRemodAdd,1460.0,1984.865753,20.645407,1950.0,1967.0,1994.0,2004.0,2010.0
MasVnrArea,1452.0,103.685262,181.066207,0.0,0.0,0.0,166.0,1600.0
BsmtFinSF1,1460.0,443.639726,456.098091,0.0,0.0,383.5,712.25,5644.0
BsmtFinSF2,1460.0,46.549315,161.319273,0.0,0.0,0.0,0.0,1474.0


In [10]:
test_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
MSSubClass,1459.0,57.378341,42.74688,20.0,20.0,50.0,70.0,190.0
LotFrontage,1232.0,68.580357,22.376841,21.0,58.0,67.0,80.0,200.0
LotArea,1459.0,9819.161069,4955.517327,1470.0,7391.0,9399.0,11517.5,56600.0
OverallQual,1459.0,6.078821,1.436812,1.0,5.0,6.0,7.0,10.0
OverallCond,1459.0,5.553804,1.11374,1.0,5.0,5.0,6.0,9.0
YearBuilt,1459.0,1971.357779,30.390071,1879.0,1953.0,1973.0,2001.0,2010.0
YearRemodAdd,1459.0,1983.662783,21.130467,1950.0,1963.0,1992.0,2004.0,2010.0
MasVnrArea,1444.0,100.709141,177.6259,0.0,0.0,0.0,164.0,1290.0
BsmtFinSF1,1458.0,439.203704,455.268042,0.0,0.0,350.5,753.5,4010.0
BsmtFinSF2,1458.0,52.619342,176.753926,0.0,0.0,0.0,0.0,1526.0


<div class="alert alert-block alert-info">
    Handle <b>Missing<b> Values<b>
</div>

In [11]:
def find_missing_values(df):
  global missing_values, missing_values_ratio
  missing_values = df.isnull().sum()
  missing_values_ratio = (df.isnull().sum() / df.shape[0] * 100).round(2)
  missing_values_df = pd.DataFrame({
      'Missing Values': missing_values[missing_values>0],
      'Missing Values %': missing_values_ratio[missing_values_ratio>0]
  })
  print(missing_values_df)


  Drop_Col = missing_values_ratio[missing_values_ratio>50].index.tolist()
  print(f"Columns names that contain 50 percent or above 50 percent null values:\n{Drop_Col}")
  df.drop(Drop_Col, axis=1, inplace=True)

  cat_columns = [col for col in df.columns if df[col].dtype == 'object']
  num_columns = [col for col in df.columns if df[col].dtype != 'object']

  if cat_columns:
    df[cat_columns] = SimpleImputer(strategy='most_frequent').fit_transform(df[cat_columns])

  if num_columns:
    df[num_columns] = SimpleImputer(strategy='mean').fit_transform(df[num_columns])


  print('----After Imputation----')
  print(df.isnull().sum())





train_missing = find_missing_values(train_df)
train_missing

              Missing Values  Missing Values %
LotFrontage              259             17.74
Alley                   1369             93.77
MasVnrType               872             59.73
MasVnrArea                 8              0.55
BsmtQual                  37              2.53
BsmtCond                  37              2.53
BsmtExposure              38              2.60
BsmtFinType1              37              2.53
BsmtFinType2              38              2.60
Electrical                 1              0.07
FireplaceQu              690             47.26
GarageType                81              5.55
GarageYrBlt               81              5.55
GarageFinish              81              5.55
GarageQual                81              5.55
GarageCond                81              5.55
PoolQC                  1453             99.52
Fence                   1179             80.75
MiscFeature             1406             96.30
Columns names that contain 50 percent or above 50 percent nu

In [12]:
test_missing = find_missing_values(test_df)
test_missing

              Missing Values  Missing Values %
MSZoning                   4              0.27
LotFrontage              227             15.56
Alley                   1352             92.67
Utilities                  2              0.14
Exterior1st                1              0.07
Exterior2nd                1              0.07
MasVnrType               894             61.27
MasVnrArea                15              1.03
BsmtQual                  44              3.02
BsmtCond                  45              3.08
BsmtExposure              44              3.02
BsmtFinType1              42              2.88
BsmtFinSF1                 1              0.07
BsmtFinType2              42              2.88
BsmtFinSF2                 1              0.07
BsmtUnfSF                  1              0.07
TotalBsmtSF                1              0.07
BsmtFullBath               2              0.14
BsmtHalfBath               2              0.14
KitchenQual                1              0.07
Functional   

----After Imputation----
MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
Street           0
                ..
MiscVal          0
MoSold           0
YrSold           0
SaleType         0
SaleCondition    0
Length: 73, dtype: int64


In [13]:
print(len(train_df.columns))
print(len(test_df.columns))


75
73


<div class="alert alert-block alert-info">
    Identifying <b>Numerical<b> and <b>Categorical<b> Features
</div>

In [14]:
def get_object_col_and_category(df):
  object_col = df.select_dtypes(include='object').columns.tolist()
  int_float_col = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
  print(f"Object Columns: {object_col}\nInt and Float Columns: {int_float_col}\n")

  for i,col in enumerate(df.select_dtypes(include='object').columns, start=1):
    print(f"{i}.{col} : \t {df[col].unique()}")

get_object_col_and_category(train_df)

Object Columns: ['MSZoning', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'SaleType', 'SaleCondition']
Int and Float Columns: ['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPor

In [15]:
get_object_col_and_category(test_df)

Object Columns: ['MSZoning', 'Street', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'SaleType', 'SaleCondition']
Int and Float Columns: ['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPor

In [16]:
train_df['Utilities'].value_counts()

Utilities
AllPub    1459
NoSeWa       1
Name: count, dtype: int64

<div class="alert alert-block alert-info">
    Columns for <b>Encoding<b> and <b>Scaling<b>
</div>

In [17]:
le_col = ['LotShape', 'LandSlope', 'ExterQual', 'ExterCond', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'HeatingQC', 
'KitchenQual', 'Functional', 'FireplaceQu', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive']

ohe_col = ['MSZoning', 'LandContour', 'LotConfig', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Foundation', 'Heating', 
'Electrical', 'GarageType', 'SaleType', 'SaleCondition']

be_col = ['Street', 'Utilities', 'CentralAir']

te_col = ['Neighborhood', 'Exterior1st', 'Exterior2nd']

target_col = 'SalePrice'

In [18]:
var = train_df['SalePrice'].value_counts()
print(len(var))

663


<div class="alert alert-block alert-info">
    Encoding <b> of <b>Categorical<b> Features
</div>

In [19]:
def encode_with_labelencoding(df, le_col_list):
  for col in le_col_list:
    if col in df.columns:
        encoder = LabelEncoder()
        df[col] = encoder.fit_transform(df[col])
  print("Label Encoding apply successfully!")
       
  return df

       
lablel_encoder_transform = FunctionTransformer(encode_with_labelencoding, kw_args={"le_col_list": le_col})
train_df = lablel_encoder_transform.fit_transform(train_df)


Label Encoding apply successfully!


In [20]:
test_df = lablel_encoder_transform.transform(test_df)

Label Encoding apply successfully!


In [21]:
def encode_with_onehotencoding(df, ohe_col_list):
  for col in ohe_col_list:
    if col in df.columns:
      encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
      train_encoder = encoder.fit_transform(df[[col]])
      encoded_col_names = [f"{col}_{category}" for category in encoder.categories_[0]]
      train_encoder_df = pd.DataFrame(train_encoder, columns=encoded_col_names, index=df.index)
      df = df.drop(col, axis=1)
      df = pd.concat([df, train_encoder_df], axis=1)
      
  print("One hot Encoding apply successfully!")
      
  return df

one_hot_encoder_transform = FunctionTransformer(encode_with_onehotencoding, kw_args={'ohe_col_list': ohe_col})
train_df = one_hot_encoder_transform.fit_transform(train_df)

One hot Encoding apply successfully!


In [22]:
test_df = one_hot_encoder_transform.transform(test_df)

One hot Encoding apply successfully!


In [23]:
def encode_with_binaryencoding(df, be_col_list):
  for col in be_col_list:
    if col in df.columns:
      unique_category_train = df[col].unique()
      binary_mapping_train = {category: index for index, category in enumerate(unique_category_train)}
      df[col] = df[col].map(binary_mapping_train)
     


  print("Binary Encoding apply successfully!")
  return df


binary_encoder_transform = FunctionTransformer(encode_with_binaryencoding, kw_args={'be_col_list':be_col})
train_df = binary_encoder_transform.fit_transform(train_df)

Binary Encoding apply successfully!


In [24]:
test_df = binary_encoder_transform.transform(test_df)

Binary Encoding apply successfully!


In [25]:
def encode_with_targetencoding(df, tar_col_list, target_col, train_mapping=None):
    if train_mapping is None:
        train_mapping = {col: df.groupby(col)[target_col].mean().to_dict() for col in tar_col_list}
    
    for col in tar_col_list:
        if col in df.columns:
            df[col] = df[col].map(train_mapping[col])
            df[col].fillna(df[col].mean(), inplace=True) 
    print("Target encoding applied successfully using FunctionTransformer!")
    return df, train_mapping  

target_encoder = FunctionTransformer(
    encode_with_targetencoding, kw_args={"tar_col_list": te_col, "target_col": target_col}
)

train_df, encoding_mappings = target_encoder.fit_transform(train_df)


Target encoding applied successfully using FunctionTransformer!


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

In [26]:
test_df,_ = encode_with_targetencoding(test_df, te_col, target_col, train_mapping=encoding_mappings)

Target encoding applied successfully using FunctionTransformer!


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alw

<div class="alert alert-block alert-info">
    Feature <b>Selection </b> 
</div>

In [27]:
X = train_df.drop(columns = 'SalePrice', axis=1)
Y = train_df['SalePrice']

selecter = SelectPercentile(score_func=mutual_info_regression, percentile=20)
X_train_selected = selecter.fit_transform(X, Y)

selecter_indices = selecter.get_support(indices=True)
selected_feature_name = X.columns[selecter_indices].tolist()

print("Total features before selection",X.shape[1])
print('Total features after selection',X_train_selected.shape[1])

print('Selected feature name: ',selected_feature_name, end='\n')

Total features before selection 155
Total features after selection 31
Selected feature name:  ['MSSubClass', 'LotFrontage', 'LotArea', 'Neighborhood', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'Exterior1st', 'Exterior2nd', 'ExterQual', 'BsmtQual', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtUnfSF', 'TotalBsmtSF', 'HeatingQC', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'FullBath', 'KitchenQual', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'OpenPorchSF', 'Foundation_PConc', 'GarageType_Detchd']


In [28]:
X_test_selected = test_df[selected_feature_name]
print("Total features before selection",X.shape[1])
print('Total features after selection',X_test_selected.shape[1])

print('Selected feature name: ',list(X_test_selected))

Total features before selection 155
Total features after selection 31
Selected feature name:  ['MSSubClass', 'LotFrontage', 'LotArea', 'Neighborhood', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'Exterior1st', 'Exterior2nd', 'ExterQual', 'BsmtQual', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtUnfSF', 'TotalBsmtSF', 'HeatingQC', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'FullBath', 'KitchenQual', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'OpenPorchSF', 'Foundation_PConc', 'GarageType_Detchd']


<div class="alert alert-block alert-info">
   <b> Split Data </b> 
</div>

In [29]:
print(X_train_selected.shape)
print(Y.shape)

(1460, 31)
(1460,)


In [30]:
X_train_final, X_val, y_train_final, y_val = train_test_split(X_train_selected, Y, test_size=0.2, random_state=42)

print(f"X_train_final shape : {X_train_final.shape}")
print(f"X_val shape         : {X_val.shape}")
print(f"y_train_final shape : {y_train_final.shape}")
print(f"y_val shape         : {y_val.shape}")

X_train_final shape : (1168, 31)
X_val shape         : (292, 31)
y_train_final shape : (1168,)
y_val shape         : (292,)


<div class="alert alert-block alert-info">
   <b> Scale Data </b> 
</div>

In [32]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train_final) 
X_val_scaled   = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test_selected)

print(f"X_train_scaled shape: {X_train_scaled.shape}")
print(f"X_val_scaled  shape: {X_val_scaled.shape}")
print(f"X_test_scaled sahpe:  {X_test_scaled.shape}")

X_train_scaled shape: (1168, 31)
X_val_scaled  shape: (292, 31)
X_test_scaled sahpe:  (1459, 31)




<div class="alert alert-block alert-info">
   <b> Train Model </b> 
</div>

In [33]:
model = RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1, random_state=42 )
model.fit(X_train_scaled, y_train_final)

<div class="alert alert-block alert-info">
   <b> Save the Model </b> 
</div>

In [35]:
with open('house_price_prediction.pickle', 'wb') as file:
  save_model = pickle.dump(model, file)

<div class="alert alert-block alert-info">
   <b> Load the Model </b> 
</div>

In [36]:
with open('house_price_prediction.pickle', 'rb') as file:
  load_model = pickle.load(file)

<div class="alert alert-block alert-info">
   <b> Evaluate the Model </b> 
</div>

In [45]:
y_train_pred = load_model.predict(X_train_scaled).round(2)

In [46]:
y_val_pred = load_model.predict(X_val_scaled).round(2)

In [47]:
X_test_pred = load_model.predict(X_test_scaled).round(2)

<div class="alert alert-block alert-info">
   <b> Accuracy Matrix </b> 
</div>

In [41]:
r2 = r2_score(y_train_final, y_train_pred)
mae = mean_absolute_error(y_train_final, y_train_pred)
mse = mean_squared_error(y_train_final, y_train_pred)
rmse = root_mean_squared_error(y_train_final, y_train_pred)

In [42]:
print(f"mean absolute error (mae): {mae*100:.2f}")
print(f"mean squared error (mse): {mse*100:.2f}")
print(f"root mean squared error (rmse): {rmse*100:.2f}")
print(f"r2 Score: {r2*100:.2f}")

mean absolute error (mae): 819973.11
mean squared error (mse): 15117892907.37
root mean squared error (rmse): 1229548.41
r2 Score: 97.47


In [55]:
df = pd.DataFrame({
  "Id": id_col,
  'SalePrice': X_test_pred

})

print(df.head())
print("CSV File Created Successfully!")

     Id  SalePrice
0  1461  123334.51
1  1462  152446.52
2  1463  187025.63
3  1464  190495.43
4  1465  204747.98
CSV File Created Successfully!


<div class="alert alert-block alert-info">
   <b> Convert to csv format </b> 
</div>

In [44]:
df.to_csv("HPrPred.csv",index=False)