In [1]:
# to IO/manipulate/calculate dataframes
import pandas as pd
import numpy as np

# to do math/statisctics
import statistics as stat
import math

# to vitualize data
import matplotlib.pyplot as plt
import seaborn as sns


# to do modeling with sklrean
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, RidgeCV
from sklearn.linear_model import Lasso, LassoCV


from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics

# other packages
import warnings
warnings.filterwarnings(action='ignore')

##  FUNCTION:  Replace the Null elements with the column's mean

In [2]:
def replace_null_with_mean(data, feature_list):
    """
    Calculate the mean of each column in a dataset and replace nulls with the column's mean
    """
    [ data[feature].fillna(data[feature].mean(),inplace=True) for feature in feature_list ]

## FUNCTION: Replace the Null elements with the column's mean

In [3]:
def replace_null_with_mode(data, feature_list):
    """
    1. To find the most frequent element in the column with object type
    2. Replace the np.nan with the mode
    """
    [data[feature].fillna(stat.mode(data[feature]), inplace=True) for feature in feature_list]

In [4]:
def print_dataset(data):
    with pd.option_context('display.max_rows', 7, 'display.max_columns', None): 
        display(data)    

## FUNCTION: plot_boxplot_category

# FUNCTION: plot_scatter_values

In [5]:
def plot_scatter_values(data, category_features=None, remove_zeros=False):
    if category_features==None:
        category_features = [col for col in dataset.columns if data[col].dtype==np.float64]
    size_title=26
    size_subtitle=14
    size_label_xy=15
    sns.set(style="white")

    fig, axes = plt.subplots(nrows=int(len(category_features)/3), ncols=3,
                             figsize=(12, int(2*len(category_features))))
    # fig.subplots_adjust(hspace=4)
#     fig.suptitle('Train Dataset\n\n\n', fontsize=size_title,  fontweight='bold')

    for ax, feature in zip(axes.flatten(), data[category_features]):
#         if remove_zeros==True:
#             dataset=dataset[dataset[feature]>0]
        sns.scatterplot(ax=ax, y=data[feature], x=data['SalePrice']/1000)
#         sns.kdeplot(ax=ax, y=dataset[feature], x=dataset['sale_price']/1000,
#                     n_levels=3, cmap='PiYG', thresh=0.5)
        
        ax.tick_params(axis='both', labelsize=size_label_xy)
        ax.set_title(feature.upper().replace('_',' '), fontsize=18,  fontweight='bold')
        ax.set_ylabel(' ',fontsize=size_label_xy)
        ax.set_xlabel('Sale Price k[$]',fontsize=size_label_xy)
        ax.set(xlim=(0, 600), ylim=None)
        ax.grid(b=True, which='major', axis='both')

    plt.rcParams['xtick.labelsize'] = 15
    plt.rcParams['ytick.labelsize'] = 15
    plt.rcParams['legend.fontsize'] = 10
    plt.rcParams['figure.titlesize'] = 12

    plt.tight_layout();
    # plt.savefig('seaborn-style.svg', bbox_inches='tight')

In [6]:
def plot_boxplot_category(data, category_features=None):
    if category_features==None:
        category_features = [col for col in data.columns if data[col].dtype==np.object_]
    size_title=26
    size_subtitle=14
    size_label_xy=15
    sns.set(style="white")

    fig, axes = plt.subplots(nrows=int(1+len(category_features)/3), ncols=3,
                             figsize=(12, int(1.5*len(category_features))))
    # fig.subplots_adjust(hspace=4)
#     fig.suptitle('Train Dataset\n\n\n', fontsize=size_title,  fontweight='bold')

    for ax, feature in zip(axes.flatten(), data[category_features]):
        my_order = data.groupby(by=[feature])['SalePrice'].mean().sort_values().index
        sns.boxplot(ax=ax, y=data[feature], x=data['SalePrice']/1000, order=list(my_order[::-1]))
        ax.tick_params(axis='both', labelsize=size_label_xy)
        ax.set_title(feature.upper().replace('_',' '), fontsize=18,  fontweight='bold')
        ax.set_ylabel(' ',fontsize=size_label_xy)
        ax.set_xlabel('Sale Price k[$]',fontsize=size_label_xy)
        ax.set(xlim=(0, 600), ylim=None)
        ax.grid(b=True, which='major', axis='both')

    plt.rcParams['xtick.labelsize'] = 15
    plt.rcParams['ytick.labelsize'] = 15
    plt.rcParams['legend.fontsize'] = 10
    plt.rcParams['figure.titlesize'] = 12

    plt.tight_layout();
    # plt.savefig('seaborn-style.svg', bbox_inches='tight')

In [7]:
def check_nulls(data):
    """
    This function read the dataset and report the following:
    1. column names with null
    2. null count 
    3. null count % 
    4. the first 10 element of each
    """
    xlist=[]
    for col in data.columns:
        number_of_nulls = data[col].isnull().sum()
#         print(col,number_of_nulls)
        if number_of_nulls!=0:
            xlist.append((col,number_of_nulls,
                          round(number_of_nulls*100/len(data),1) ,data[col][0:10].values))
    null_data = pd.DataFrame(data=xlist,columns=['column name','null count',
                                            'null count %','values example'])
    return null_data

## FUNCTION: print_dataset_Type

In [8]:
def print_dataset_type(data):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):
        print(data.dtypes)

# Read Train dataset ===========================================================

In [9]:
dataset = pd.read_csv('../datasets/test.csv')

In [10]:
print_dataset(dataset)

Unnamed: 0,Id,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,Utilities,Lot Config,Land Slope,Neighborhood,Condition 1,Condition 2,Bldg Type,House Style,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Roof Style,Roof Matl,Exterior 1st,Exterior 2nd,Mas Vnr Type,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating,Heating QC,Central Air,Electrical,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Type,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type
0,2658,902301120,190,RM,69.0,9142,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,2fmCon,2Story,6,8,1910,1950,Gable,CompShg,AsbShng,AsbShng,,0.0,TA,Fa,Stone,Fa,TA,No,Unf,0,Unf,0,1020,1020,GasA,Gd,N,FuseP,908,1020,0,1928,0,0,2,0,4,2,Fa,9,Typ,0,,Detchd,1910.0,Unf,1,440,Po,Po,Y,0,60,112,0,0,0,,,,0,4,2006,WD
1,2718,905108090,90,RL,,9662,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,Duplex,1Story,5,4,1977,1977,Gable,CompShg,Plywood,Plywood,,0.0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1967,1967,GasA,TA,Y,SBrkr,1967,0,0,1967,0,0,2,0,6,2,TA,10,Typ,0,,Attchd,1977.0,Fin,2,580,TA,TA,Y,170,0,0,0,0,0,,,,0,8,2006,WD
2,2414,528218130,60,RL,58.0,17104,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2006,2006,Gable,CompShg,VinylSd,VinylSd,,0.0,Gd,TA,PConc,Gd,Gd,Av,GLQ,554,Unf,0,100,654,GasA,Ex,Y,SBrkr,664,832,0,1496,1,0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,2006.0,RFn,2,426,TA,TA,Y,100,24,0,0,0,0,,,,0,9,2006,New
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,904100040,20,RL,55.0,8250,Pave,,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,5,1968,1968,Hip,CompShg,HdBoard,HdBoard,,0.0,TA,TA,CBlock,TA,TA,No,BLQ,250,LwQ,492,210,952,GasA,Ex,Y,SBrkr,1211,0,0,1211,0,0,1,0,3,1,TA,5,Typ,1,TA,Attchd,1968.0,Unf,1,322,TA,TA,Y,0,63,0,0,0,0,,,,0,8,2008,WD
876,1672,527425140,20,RL,60.0,9000,Pave,,Reg,Lvl,AllPub,FR2,Gtl,NAmes,Norm,Norm,1Fam,1Story,4,6,1971,1971,Gable,CompShg,HdBoard,HdBoard,,0.0,TA,TA,PConc,TA,TA,No,ALQ,616,Unf,0,248,864,GasA,TA,Y,SBrkr,864,0,0,864,0,0,1,0,3,1,TA,5,Typ,0,,Detchd,1974.0,Unf,2,528,TA,TA,Y,0,0,0,0,0,0,,GdWo,,0,5,2007,WD
877,1939,535327160,20,RL,70.0,8400,Pave,,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1955,1955,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,Gd,CBlock,TA,TA,No,BLQ,793,Unf,0,130,923,GasA,TA,Y,SBrkr,925,0,0,925,1,0,1,0,3,1,TA,5,Typ,2,TA,Attchd,1955.0,Unf,1,390,TA,TA,Y,81,0,0,0,0,0,,GdWo,,0,3,2007,WD


In [11]:
# check_nulls(dataset)

# Make category features ===========================================================

# Features: Continous / numeric ===========================================================

In [12]:
features_continous_numeric = [
    'Lot Frontage',
    'Lot Area',
    'Mas Vnr Area',
    'BsmtFin SF 1', 'BsmtFin SF 2', 'Bsmt Unf SF', 'Total Bsmt SF',
    '1st Flr SF',
    '2nd Flr SF',
    'Low Qual Fin SF',
    'Gr Liv Area', 
    'Wood Deck SF', 'Open Porch SF', 'Enclosed Porch',
    '3Ssn Porch', 'Screen Porch',
    'Pool Area', 
    'Misc Val',
    'Garage Yr Blt',
    
]


In [13]:
check_nulls(dataset[features_continous_numeric])

Unnamed: 0,column name,null count,null count %,values example
0,Lot Frontage,160,18.2,"[69.0, nan, 58.0, 60.0, nan, 21.0, 52.0, nan, ..."
1,Mas Vnr Area,1,0.1,"[0.0, 0.0, 0.0, 0.0, 247.0, 0.0, 0.0, 0.0, 0.0..."
2,Garage Yr Blt,45,5.1,"[1910.0, 1977.0, 2006.0, 1935.0, 1963.0, 1972...."


In [14]:
# replace_null_with_mean(dataset_dum, ['Garage Yr Blt'])


In [15]:
replace_null_with_mean(dataset , ['Lot Frontage', 'Mas Vnr Area', 'Garage Yr Blt'])

In [16]:
null = check_nulls(dataset[features_continous_numeric])
null

Unnamed: 0,column name,null count,null count %,values example


In [17]:
# drop nulls < 30 
# dataset.dropna(subset=null[null['null count'] < 30]['column name'], inplace = True)

In [18]:
check_nulls(dataset[features_continous_numeric])

Unnamed: 0,column name,null count,null count %,values example


In [19]:
# plot features_continous_numeric

In [20]:
# plot_scatter_values(dataset , features_continous_numeric[0:3])

In [21]:
check_nulls( dataset[features_continous_numeric] )

Unnamed: 0,column name,null count,null count %,values example


In [22]:
# drop '3Ssn Porch'
dataset.drop(columns=['Pool Area'], inplace= True)

In [23]:
# drop '3Ssn Porch' feature
dataset.drop(columns=['3Ssn Porch'], inplace= True)

In [24]:
# drop 'Low Qual Fin SF' feature
dataset.drop(columns=['Low Qual Fin SF'], inplace= True)

In [25]:
#save
dataset.to_csv('../datasets/test_df_cleaned.csv')

# Features: features_string_to_dum ===========================================================

In [26]:
features_category_string_to_dum = [   
    'MS Zoning',
    'Street',
    'Street',
    'Lot Shape',
    'Land Contour',
    'Utilities',
    'Neighborhood',
    'Bldg Type',
    'House Style',
    'Roof Style',
    'Roof Matl',
    'Exterior 1st' , 'Exterior 2nd',
    'Condition 1', 'Condition 2',
    'Mas Vnr Type', 
    'Heating',
    'Central Air', # Y, N
    'Electrical',   # SBrkr...
    'Functional',   # Typ, Min1, ...
    'Garage Type', 
    'Sale Type', # WD, ...
    'Lot Config',
#     'Misc Feature',
    
#     'Sale Condition',
    
    
]

In [27]:
print_dataset(dataset[features_category_string_to_dum])

Unnamed: 0,MS Zoning,Street,Street.1,Lot Shape,Land Contour,Utilities,Neighborhood,Bldg Type,House Style,Roof Style,Roof Matl,Exterior 1st,Exterior 2nd,Condition 1,Condition 2,Mas Vnr Type,Heating,Central Air,Electrical,Functional,Garage Type,Sale Type,Lot Config
0,RM,Pave,Pave,Reg,Lvl,AllPub,OldTown,2fmCon,2Story,Gable,CompShg,AsbShng,AsbShng,Norm,Norm,,GasA,N,FuseP,Typ,Detchd,WD,Inside
1,RL,Pave,Pave,IR1,Lvl,AllPub,Sawyer,Duplex,1Story,Gable,CompShg,Plywood,Plywood,Norm,Norm,,GasA,Y,SBrkr,Typ,Attchd,WD,Inside
2,RL,Pave,Pave,IR1,Lvl,AllPub,Gilbert,1Fam,2Story,Gable,CompShg,VinylSd,VinylSd,Norm,Norm,,GasA,Y,SBrkr,Typ,Attchd,New,Inside
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,RL,Pave,Pave,Reg,Lvl,AllPub,Sawyer,1Fam,1Story,Hip,CompShg,HdBoard,HdBoard,Feedr,Norm,,GasA,Y,SBrkr,Typ,Attchd,WD,Inside
876,RL,Pave,Pave,Reg,Lvl,AllPub,NAmes,1Fam,1Story,Gable,CompShg,HdBoard,HdBoard,Norm,Norm,,GasA,Y,SBrkr,Typ,Detchd,WD,FR2
877,RL,Pave,Pave,Reg,Lvl,AllPub,NAmes,1Fam,1Story,Gable,CompShg,MetalSd,MetalSd,Norm,Norm,,GasA,Y,SBrkr,Typ,Attchd,WD,Corner


In [28]:
dataset[features_category_string_to_dum].isnull().sum()

MS Zoning        0
Street           0
Street           0
Lot Shape        0
Land Contour     0
Utilities        0
Neighborhood     0
Bldg Type        0
House Style      0
Roof Style       0
Roof Matl        0
Exterior 1st     0
Exterior 2nd     0
Condition 1      0
Condition 2      0
Mas Vnr Type     1
Heating          0
Central Air      0
Electrical       1
Functional       0
Garage Type     44
Sale Type        0
Lot Config       0
dtype: int64

In [29]:
dataset[['Mas Vnr Type','Garage Type','Misc Feature']] = dataset[['Mas Vnr Type','Garage Type','Misc Feature']].fillna(value='NA') #NA : Not Availible

In [30]:
# plot_boxplot_category(dataset,features_category_string_to_dum)

In [31]:
dataset_dum = pd.get_dummies(data = dataset, 
                             columns = features_category_string_to_dum,  
                             drop_first = True)

In [32]:
# drop 'Lot Config'
dataset['Lot Config'].drop(columns=['Lot Config'], inplace= True)

In [33]:
print_dataset(dataset_dum)

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Alley,Land Slope,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Street_Pave.1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_NA,Mas Vnr Type_None,Mas Vnr Type_Stone,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_NA,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside
0,2658,902301120,190,69.000000,9142,Grvl,Gtl,6,8,1910,1950,0.0,TA,Fa,Stone,Fa,TA,No,Unf,0,Unf,0,1020,1020,Gd,908,1020,1928,0,0,2,0,4,2,Fa,9,0,,1910.0,Unf,1,440,Po,Po,Y,0,60,112,0,,,,0,4,2006,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1
1,2718,905108090,90,69.545961,9662,,Gtl,5,4,1977,1977,0.0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1967,1967,TA,1967,0,1967,0,0,2,0,6,2,TA,10,0,,1977.0,Fin,2,580,TA,TA,Y,170,0,0,0,,,,0,8,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
2,2414,528218130,60,58.000000,17104,,Gtl,7,5,2006,2006,0.0,Gd,TA,PConc,Gd,Gd,Av,GLQ,554,Unf,0,100,654,Ex,664,832,1496,1,0,2,1,3,1,Gd,7,1,Gd,2006.0,RFn,2,426,TA,TA,Y,100,24,0,0,,,,0,9,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,904100040,20,55.000000,8250,,Gtl,5,5,1968,1968,0.0,TA,TA,CBlock,TA,TA,No,BLQ,250,LwQ,492,210,952,Ex,1211,0,1211,0,0,1,0,3,1,TA,5,1,TA,1968.0,Unf,1,322,TA,TA,Y,0,63,0,0,,,,0,8,2008,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
876,1672,527425140,20,60.000000,9000,,Gtl,4,6,1971,1971,0.0,TA,TA,PConc,TA,TA,No,ALQ,616,Unf,0,248,864,TA,864,0,864,0,0,1,0,3,1,TA,5,0,,1974.0,Unf,2,528,TA,TA,Y,0,0,0,0,,GdWo,,0,5,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0
877,1939,535327160,20,70.000000,8400,,Gtl,5,5,1955,1955,0.0,TA,Gd,CBlock,TA,TA,No,BLQ,793,Unf,0,130,923,TA,925,0,925,1,0,1,0,3,1,TA,5,2,TA,1955.0,Unf,1,390,TA,TA,Y,81,0,0,0,,GdWo,,0,3,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0


# Features: features_categorical_numeric_values ===========================================================

In [34]:
features_categorical_numeric_values = [
    'Overall Qual', 'Overall Cond', # 10, 9, 8, ..., 1

#     'Alley'
]

In [35]:
check_nulls(dataset_dum[features_categorical_numeric_values])

Unnamed: 0,column name,null count,null count %,values example


In [36]:
# print_dataset_type(dataset_dum[features_categorical_numeric_values])

In [37]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[features_categorical_numeric_values],'.');

In [38]:
# dataset['Overall Qual Cond combined'] = 
#FIND ME!! COMBINE TWO ARRAYS!!!!

In [39]:
dataset_dum = pd.get_dummies(data = dataset_dum , 
                             columns = features_categorical_numeric_values,  
                             drop_first = True)

In [40]:
print_dataset(dataset_dum)

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Alley,Land Slope,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Street_Pave.1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_NA,Mas Vnr Type_None,Mas Vnr Type_Stone,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_NA,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9
0,2658,902301120,190,69.000000,9142,Grvl,Gtl,1910,1950,0.0,TA,Fa,Stone,Fa,TA,No,Unf,0,Unf,0,1020,1020,Gd,908,1020,1928,0,0,2,0,4,2,Fa,9,0,,1910.0,Unf,1,440,Po,Po,Y,0,60,112,0,,,,0,4,2006,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0
1,2718,905108090,90,69.545961,9662,,Gtl,1977,1977,0.0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1967,1967,TA,1967,0,1967,0,0,2,0,6,2,TA,10,0,,1977.0,Fin,2,580,TA,TA,Y,170,0,0,0,,,,0,8,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0
2,2414,528218130,60,58.000000,17104,,Gtl,2006,2006,0.0,Gd,TA,PConc,Gd,Gd,Av,GLQ,554,Unf,0,100,654,Ex,664,832,1496,1,0,2,1,3,1,Gd,7,1,Gd,2006.0,RFn,2,426,TA,TA,Y,100,24,0,0,,,,0,9,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,904100040,20,55.000000,8250,,Gtl,1968,1968,0.0,TA,TA,CBlock,TA,TA,No,BLQ,250,LwQ,492,210,952,Ex,1211,0,1211,0,0,1,0,3,1,TA,5,1,TA,1968.0,Unf,1,322,TA,TA,Y,0,63,0,0,,,,0,8,2008,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
876,1672,527425140,20,60.000000,9000,,Gtl,1971,1971,0.0,TA,TA,PConc,TA,TA,No,ALQ,616,Unf,0,248,864,TA,864,0,864,0,0,1,0,3,1,TA,5,0,,1974.0,Unf,2,528,TA,TA,Y,0,0,0,0,,GdWo,,0,5,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
877,1939,535327160,20,70.000000,8400,,Gtl,1955,1955,0.0,TA,Gd,CBlock,TA,TA,No,BLQ,793,Unf,0,130,923,TA,925,0,925,1,0,1,0,3,1,TA,5,2,TA,1955.0,Unf,1,390,TA,TA,Y,81,0,0,0,,GdWo,,0,3,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0


In [41]:
dataset_dum.to_csv('../datasets/test_df_cleaned.csv')

# Features: Categorical with quality in numeric ===========================================================

In [42]:
features_category_numeric_to_dum = [
    'MS SubClass',
    'Mo Sold',
    
]

In [43]:
check_nulls( dataset_dum[features_category_numeric_to_dum] )

Unnamed: 0,column name,null count,null count %,values example


In [44]:
# plot_scatter_values(dataset, features_category_numeric_to_dum)

In [45]:
dataset_dum = pd.get_dummies(data = dataset_dum , 
                             columns = features_category_numeric_to_dum,  
                             drop_first = True)

In [46]:
print_dataset(dataset_dum)

Unnamed: 0,Id,PID,Lot Frontage,Lot Area,Alley,Land Slope,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Pool QC,Fence,Misc Feature,Misc Val,Yr Sold,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Street_Pave.1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_NA,Mas Vnr Type_None,Mas Vnr Type_Stone,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_NA,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9,MS SubClass_30,MS SubClass_40,MS SubClass_45,MS SubClass_50,MS SubClass_60,MS SubClass_70,MS SubClass_75,MS SubClass_80,MS SubClass_85,MS SubClass_90,MS SubClass_120,MS SubClass_160,MS SubClass_180,MS SubClass_190,Mo Sold_2,Mo Sold_3,Mo Sold_4,Mo Sold_5,Mo Sold_6,Mo Sold_7,Mo Sold_8,Mo Sold_9,Mo Sold_10,Mo Sold_11,Mo Sold_12
0,2658,902301120,69.000000,9142,Grvl,Gtl,1910,1950,0.0,TA,Fa,Stone,Fa,TA,No,Unf,0,Unf,0,1020,1020,Gd,908,1020,1928,0,0,2,0,4,2,Fa,9,0,,1910.0,Unf,1,440,Po,Po,Y,0,60,112,0,,,,0,2006,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
1,2718,905108090,69.545961,9662,,Gtl,1977,1977,0.0,TA,TA,CBlock,Gd,TA,No,Unf,0,Unf,0,1967,1967,TA,1967,0,1967,0,0,2,0,6,2,TA,10,0,,1977.0,Fin,2,580,TA,TA,Y,170,0,0,0,,,,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,2414,528218130,58.000000,17104,,Gtl,2006,2006,0.0,Gd,TA,PConc,Gd,Gd,Av,GLQ,554,Unf,0,100,654,Ex,664,832,1496,1,0,2,1,3,1,Gd,7,1,Gd,2006.0,RFn,2,426,TA,TA,Y,100,24,0,0,,,,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,904100040,55.000000,8250,,Gtl,1968,1968,0.0,TA,TA,CBlock,TA,TA,No,BLQ,250,LwQ,492,210,952,Ex,1211,0,1211,0,0,1,0,3,1,TA,5,1,TA,1968.0,Unf,1,322,TA,TA,Y,0,63,0,0,,,,0,2008,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
876,1672,527425140,60.000000,9000,,Gtl,1971,1971,0.0,TA,TA,PConc,TA,TA,No,ALQ,616,Unf,0,248,864,TA,864,0,864,0,0,1,0,3,1,TA,5,0,,1974.0,Unf,2,528,TA,TA,Y,0,0,0,0,,GdWo,,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
877,1939,535327160,70.000000,8400,,Gtl,1955,1955,0.0,TA,Gd,CBlock,TA,TA,No,BLQ,793,Unf,0,130,923,TA,925,0,925,1,0,1,0,3,1,TA,5,2,TA,1955.0,Unf,1,390,TA,TA,Y,81,0,0,0,,GdWo,,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [47]:
dataset_dum.to_csv('../datasets/test_df_cleaned.csv')

# Features: quality features in a range of Excellent to poor ===========================================================

In [48]:
string_quality_to_quantity_features =[ #rate
    'Land Slope',
    'Exter Qual' , 'Exter Cond',   # Ex, Gd, TA, Fa, Po
    'Bsmt Qual', 'Bsmt Cond',      # Ex, Gd, TA, Fa, Po, NA
    'Bsmt Exposure',
    'BsmtFin Type 1', 'BsmtFin Type 2', # GLQ, ALQ, BLQ, Rec, LwQ, Unf, NA
    'Heating QC',  # Ex, Gd, TA, Fa, Po
    'Kitchen Qual', # Ex, Gd, TA, Fa, Po
    'Fireplace Qu',     # Ex, Gd, TA, Fa, Po, NA
    'Garage Finish',   # Fin, 
    'Garage Qual',     # Ex, Gd, TA, Fa, Po, NA
    'Paved Drive',     # Y, P, N
    'Pool QC', # Ex, Gd, TA, Fa, NA
    'Fence', # GdPrv,...
    'Garage Cond',
]

In [49]:
# plot_boxplot_category(dataset_dum, string_quality_to_quantity_features)

In [50]:
check_nulls(dataset_dum[string_quality_to_quantity_features])

Unnamed: 0,column name,null count,null count %,values example
0,Bsmt Qual,25,2.8,"[Fa, Gd, Gd, TA, Gd, TA, TA, Gd, Gd, TA]"
1,Bsmt Cond,25,2.8,"[TA, TA, Gd, TA, TA, TA, TA, Gd, TA, TA]"
2,Bsmt Exposure,25,2.8,"[No, No, Av, No, No, No, No, Av, No, No]"
3,BsmtFin Type 1,25,2.8,"[Unf, Unf, GLQ, Unf, BLQ, Rec, Unf, ALQ, Unf, ..."
4,BsmtFin Type 2,25,2.8,"[Unf, Unf, Unf, Unf, Unf, Unf, Unf, Unf, Unf, ..."
5,Fireplace Qu,422,48.1,"[nan, nan, Gd, nan, Gd, nan, nan, nan, nan, Fa]"
6,Garage Finish,45,5.1,"[Unf, Fin, RFn, Unf, RFn, Unf, Unf, Unf, RFn, ..."
7,Garage Qual,45,5.1,"[Po, TA, TA, Fa, TA, TA, TA, TA, TA, TA]"
8,Pool QC,874,99.5,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."
9,Fence,706,80.4,"[nan, nan, nan, nan, nan, nan, nan, nan, nan, ..."


In [51]:
dataset_dum[string_quality_to_quantity_features] = dataset_dum[string_quality_to_quantity_features].fillna(value='NA') #NA : Not Availible

In [52]:
check_nulls(dataset_dum[string_quality_to_quantity_features])

Unnamed: 0,column name,null count,null count %,values example


In [53]:
#check categories
# dataset_dum[string_quality_to_quantity_features[16]].value_counts()

In [54]:
def replace_quantity_to_quality(df , x):
    [df.replace(x[i][0],x[i][1], inplace = True) for i in range(len(x)) ]
    return df

In [55]:
list_cat=['Fence']
x = [ ['NA',0] , ['MnWw',1], ['GdWo',2], ['MnPrv', 3], ['GdPrv', 4]]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [56]:
list_cat=['Pool QC']
x = [ ['NA',0] , ['Fa',1], ['TA',2], ['Gd', 3]]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [57]:
list_cat=['Land Slope']
x = [ ['Gtl',1] , ['Mod',2], ['Sev',3]]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [58]:
list_cat=['Exter Qual','Pool QC']
x = [ ['NA',0] ,['Fa',1] , ['TA',2], ['Gd',3],  ['Ex',4]]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [59]:
list_cat=['Exter Cond','Bsmt Qual', 'Bsmt Cond', 'Heating QC', 'Fireplace Qu', 'Garage Qual', 'Garage Cond', 'Kitchen Qual']
x = [ ['NA',0] , ['Po',0] ,['Fa',1] , ['TA',2], ['Gd',3], ['Ex',4]]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [60]:
list_cat=['Bsmt Exposure']
x = [ ['NA',0] , ['No',0] ,['Mn',1] , ['Av',2], ['Gd',3]]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [61]:
# find me combine 'BsmtFin Type 1','BsmtFin Type 2'

list_cat=['BsmtFin Type 1','BsmtFin Type 2']
x = [ ['NA',0] , ['Unf',1] , ['LwQ',2], ['Rec',3], ['BLQ', 4], ['ALQ', 5], ['GLQ', 6]]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [62]:
# find me combine 'BsmtFin Type 1','BsmtFin Type 2'

list_cat=['Garage Finish']
x = [ ['NA',0] , ['Unf',1] , ['RFn',2], ['Fin',3] ]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [63]:
# find me combine 'BsmtFin Type 1','BsmtFin Type 2'

list_cat=['Paved Drive']
x = [ ['NA',0] , ['N',1] , ['P',2], ['Y',3] ]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [64]:
check_nulls(dataset_dum[string_quality_to_quantity_features])

Unnamed: 0,column name,null count,null count %,values example


In [65]:
print_dataset ( dataset_dum[string_quality_to_quantity_features] )

Unnamed: 0,Land Slope,Exter Qual,Exter Cond,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin Type 2,Heating QC,Kitchen Qual,Fireplace Qu,Garage Finish,Garage Qual,Paved Drive,Pool QC,Fence,Garage Cond
0,1,2,1,1,2,0,1,1,3,1,0,1,0,3,0,0,0
1,1,2,2,3,2,0,1,1,2,2,0,3,2,3,0,0,2
2,1,3,2,3,3,2,6,1,4,3,3,2,2,3,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1,2,2,2,2,0,4,2,4,2,2,1,2,3,0,0,2
876,1,2,2,2,2,0,5,1,2,2,0,1,2,3,0,2,2
877,1,2,3,2,2,0,4,1,2,2,2,1,2,3,0,2,2


In [66]:
check_nulls(dataset_dum[string_quality_to_quantity_features])

Unnamed: 0,column name,null count,null count %,values example


In [67]:
print_dataset_type(dataset_dum[string_quality_to_quantity_features])

Land Slope        int64
Exter Qual        int64
Exter Cond        int64
Bsmt Qual         int64
Bsmt Cond         int64
Bsmt Exposure     int64
BsmtFin Type 1    int64
BsmtFin Type 2    int64
Heating QC        int64
Kitchen Qual      int64
Fireplace Qu      int64
Garage Finish     int64
Garage Qual       int64
Paved Drive       int64
Pool QC           int64
Fence             int64
Garage Cond       int64
dtype: object


In [68]:
dataset_dum.to_csv('../datasets/test_df_cleaned.csv')

# Features: Continous / numeric ===========================================================

In [69]:
features_continous_numeric = [
    'Lot Frontage',
    'Lot Area',
    'Mas Vnr Area',
    'BsmtFin SF 1', 'BsmtFin SF 2', 'Bsmt Unf SF', 'Total Bsmt SF',
    '1st Flr SF',
    '2nd Flr SF',
#     'Low Qual Fin SF',
    'Gr Liv Area', 
    'Wood Deck SF', 'Open Porch SF', 'Enclosed Porch',
#     '3Ssn Porch', 
    'Screen Porch',
#     'Pool Area', 
    'Misc Val',
    
]


In [70]:
check_nulls(dataset_dum[features_continous_numeric])

Unnamed: 0,column name,null count,null count %,values example


In [71]:
# replace_null_with_mean(dataset_dum , ['Lot Frontage','Mas Vnr Area'])

In [72]:
# print_dataset_type (  dataset_dum[['Lot Frontage','Mas Vnr Area'] ]  )

In [73]:
# drop nulls < 30 
# dataset_dum.dropna(subset=null[null['null count'] < 30]['column name'], inplace = True)

In [74]:
# check_nulls(dataset_dum[features_continous_numeric])

In [75]:
# dataset_dum['Lot Frontage'].isnull().sum()

In [76]:
# plot features_continous_numeric

In [77]:
# plot_scatter_values(dataset_dum , features_continous_numeric[0:3])

In [78]:
print_dataset(dataset_dum)

Unnamed: 0,Id,PID,Lot Frontage,Lot Area,Alley,Land Slope,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Pool QC,Fence,Misc Feature,Misc Val,Yr Sold,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Street_Pave.1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_NA,Mas Vnr Type_None,Mas Vnr Type_Stone,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_NA,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9,MS SubClass_30,MS SubClass_40,MS SubClass_45,MS SubClass_50,MS SubClass_60,MS SubClass_70,MS SubClass_75,MS SubClass_80,MS SubClass_85,MS SubClass_90,MS SubClass_120,MS SubClass_160,MS SubClass_180,MS SubClass_190,Mo Sold_2,Mo Sold_3,Mo Sold_4,Mo Sold_5,Mo Sold_6,Mo Sold_7,Mo Sold_8,Mo Sold_9,Mo Sold_10,Mo Sold_11,Mo Sold_12
0,2658,902301120,69.000000,9142,Grvl,1,1910,1950,0.0,2,1,Stone,1,2,0,1,0,1,0,1020,1020,3,908,1020,1928,0,0,2,0,4,2,1,9,0,0,1910.0,1,1,440,0,0,3,0,60,112,0,0,0,,0,2006,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
1,2718,905108090,69.545961,9662,,1,1977,1977,0.0,2,2,CBlock,3,2,0,1,0,1,0,1967,1967,2,1967,0,1967,0,0,2,0,6,2,2,10,0,0,1977.0,3,2,580,2,2,3,170,0,0,0,0,0,,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,2414,528218130,58.000000,17104,,1,2006,2006,0.0,3,2,PConc,3,3,2,6,554,1,0,100,654,4,664,832,1496,1,0,2,1,3,1,3,7,1,3,2006.0,2,2,426,2,2,3,100,24,0,0,0,0,,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,904100040,55.000000,8250,,1,1968,1968,0.0,2,2,CBlock,2,2,0,4,250,2,492,210,952,4,1211,0,1211,0,0,1,0,3,1,2,5,1,2,1968.0,1,1,322,2,2,3,0,63,0,0,0,0,,0,2008,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
876,1672,527425140,60.000000,9000,,1,1971,1971,0.0,2,2,PConc,2,2,0,5,616,1,0,248,864,2,864,0,864,0,0,1,0,3,1,2,5,0,0,1974.0,1,2,528,2,2,3,0,0,0,0,0,2,,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
877,1939,535327160,70.000000,8400,,1,1955,1955,0.0,2,3,CBlock,2,2,0,4,793,1,0,130,923,2,925,0,925,1,0,1,0,3,1,2,5,2,2,1955.0,1,1,390,2,2,3,81,0,0,0,0,2,,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [79]:
# remove the dropped features from features_continous_numeric
# [features_continous_numeric.remove(elem) for elem in  ['Low Qual Fin SF', 'Pool Area', '3Ssn Porch'] ]

In [80]:
print_dataset(dataset_dum[features_continous_numeric])

Unnamed: 0,Lot Frontage,Lot Area,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Gr Liv Area,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Misc Val
0,69.000000,9142,0.0,0,0,1020,1020,908,1020,1928,0,60,112,0,0
1,69.545961,9662,0.0,0,0,1967,1967,1967,0,1967,170,0,0,0,0
2,58.000000,17104,0.0,554,0,100,654,664,832,1496,100,24,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,55.000000,8250,0.0,250,492,210,952,1211,0,1211,0,63,0,0,0
876,60.000000,9000,0.0,616,0,248,864,864,0,864,0,0,0,0,0
877,70.000000,8400,0.0,793,0,130,923,925,0,925,81,0,0,0,0


In [81]:
check_nulls(dataset_dum[features_continous_numeric])

Unnamed: 0,column name,null count,null count %,values example


In [82]:
print_dataset_type( dataset_dum[features_continous_numeric] )

Lot Frontage      float64
Lot Area            int64
Mas Vnr Area      float64
BsmtFin SF 1        int64
BsmtFin SF 2        int64
Bsmt Unf SF         int64
Total Bsmt SF       int64
1st Flr SF          int64
2nd Flr SF          int64
Gr Liv Area         int64
Wood Deck SF        int64
Open Porch SF       int64
Enclosed Porch      int64
Screen Porch        int64
Misc Val            int64
dtype: object


In [83]:
#save
dataset_dum.to_csv('../datasets/test_df_cleaned.csv')

# Features: will be deleted ===========================================================

In [84]:
useless_features_toDrop = [
    'Id',
    'PID',
    'Alley',
#     'Alley'
]

In [85]:
print_dataset(dataset_dum)

Unnamed: 0,Id,PID,Lot Frontage,Lot Area,Alley,Land Slope,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Pool QC,Fence,Misc Feature,Misc Val,Yr Sold,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Street_Pave.1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_NA,Mas Vnr Type_None,Mas Vnr Type_Stone,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_NA,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9,MS SubClass_30,MS SubClass_40,MS SubClass_45,MS SubClass_50,MS SubClass_60,MS SubClass_70,MS SubClass_75,MS SubClass_80,MS SubClass_85,MS SubClass_90,MS SubClass_120,MS SubClass_160,MS SubClass_180,MS SubClass_190,Mo Sold_2,Mo Sold_3,Mo Sold_4,Mo Sold_5,Mo Sold_6,Mo Sold_7,Mo Sold_8,Mo Sold_9,Mo Sold_10,Mo Sold_11,Mo Sold_12
0,2658,902301120,69.000000,9142,Grvl,1,1910,1950,0.0,2,1,Stone,1,2,0,1,0,1,0,1020,1020,3,908,1020,1928,0,0,2,0,4,2,1,9,0,0,1910.0,1,1,440,0,0,3,0,60,112,0,0,0,,0,2006,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
1,2718,905108090,69.545961,9662,,1,1977,1977,0.0,2,2,CBlock,3,2,0,1,0,1,0,1967,1967,2,1967,0,1967,0,0,2,0,6,2,2,10,0,0,1977.0,3,2,580,2,2,3,170,0,0,0,0,0,,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,2414,528218130,58.000000,17104,,1,2006,2006,0.0,3,2,PConc,3,3,2,6,554,1,0,100,654,4,664,832,1496,1,0,2,1,3,1,3,7,1,3,2006.0,2,2,426,2,2,3,100,24,0,0,0,0,,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,904100040,55.000000,8250,,1,1968,1968,0.0,2,2,CBlock,2,2,0,4,250,2,492,210,952,4,1211,0,1211,0,0,1,0,3,1,2,5,1,2,1968.0,1,1,322,2,2,3,0,63,0,0,0,0,,0,2008,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
876,1672,527425140,60.000000,9000,,1,1971,1971,0.0,2,2,PConc,2,2,0,5,616,1,0,248,864,2,864,0,864,0,0,1,0,3,1,2,5,0,0,1974.0,1,2,528,2,2,3,0,0,0,0,0,2,,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
877,1939,535327160,70.000000,8400,,1,1955,1955,0.0,2,3,CBlock,2,2,0,4,793,1,0,130,923,2,925,0,925,1,0,1,0,3,1,2,5,2,2,1955.0,1,1,390,2,2,3,81,0,0,0,0,2,,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [86]:
# drop 'PID' feature
dataset_dum.drop(columns=['PID'], inplace= True)

In [87]:
# drop 'Alley' feature
dataset_dum.drop(columns=['Alley'], inplace= True)

In [88]:
print_dataset(dataset_dum)

Unnamed: 0,Id,Lot Frontage,Lot Area,Land Slope,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Pool QC,Fence,Misc Feature,Misc Val,Yr Sold,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Street_Pave.1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_NA,Mas Vnr Type_None,Mas Vnr Type_Stone,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_NA,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9,MS SubClass_30,MS SubClass_40,MS SubClass_45,MS SubClass_50,MS SubClass_60,MS SubClass_70,MS SubClass_75,MS SubClass_80,MS SubClass_85,MS SubClass_90,MS SubClass_120,MS SubClass_160,MS SubClass_180,MS SubClass_190,Mo Sold_2,Mo Sold_3,Mo Sold_4,Mo Sold_5,Mo Sold_6,Mo Sold_7,Mo Sold_8,Mo Sold_9,Mo Sold_10,Mo Sold_11,Mo Sold_12
0,2658,69.000000,9142,1,1910,1950,0.0,2,1,Stone,1,2,0,1,0,1,0,1020,1020,3,908,1020,1928,0,0,2,0,4,2,1,9,0,0,1910.0,1,1,440,0,0,3,0,60,112,0,0,0,,0,2006,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0
1,2718,69.545961,9662,1,1977,1977,0.0,2,2,CBlock,3,2,0,1,0,1,0,1967,1967,2,1967,0,1967,0,0,2,0,6,2,2,10,0,0,1977.0,3,2,580,2,2,3,170,0,0,0,0,0,,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
2,2414,58.000000,17104,1,2006,2006,0.0,3,2,PConc,3,3,2,6,554,1,0,100,654,4,664,832,1496,1,0,2,1,3,1,3,7,1,3,2006.0,2,2,426,2,2,3,100,24,0,0,0,0,,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,55.000000,8250,1,1968,1968,0.0,2,2,CBlock,2,2,0,4,250,2,492,210,952,4,1211,0,1211,0,0,1,0,3,1,2,5,1,2,1968.0,1,1,322,2,2,3,0,63,0,0,0,0,,0,2008,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
876,1672,60.000000,9000,1,1971,1971,0.0,2,2,PConc,2,2,0,5,616,1,0,248,864,2,864,0,864,0,0,1,0,3,1,2,5,0,0,1974.0,1,2,528,2,2,3,0,0,0,0,0,2,,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
877,1939,70.000000,8400,1,1955,1955,0.0,2,3,CBlock,2,2,0,4,793,1,0,130,923,2,925,0,925,1,0,1,0,3,1,2,5,2,2,1955.0,1,1,390,2,2,3,81,0,0,0,0,2,,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


# Features: for possible engeeniering ===========================================================

In [89]:
features_to_be_decide = [

    'Year Built', 'Yr Sold',
    'Year Remod/Add',
    'Foundation',
    'Bsmt Full Bath', 'Bsmt Half Bath', 'Full Bath', 'Half Bath',
    'Bedroom AbvGr',
    'Kitchen AbvGr',
    'TotRms AbvGrd',
    'Fireplaces',
    'Garage Yr Blt',
    'Garage Cars', 'Garage Area',
#         'Misc Feature' # Elev, Gar2,    

    
    

]

In [90]:
null = check_nulls(dataset_dum[features_to_be_decide])
null

Unnamed: 0,column name,null count,null count %,values example


In [91]:
dataset_dum['Garage Yr Blt'].isna()

0      False
1      False
2      False
3      False
4      False
       ...  
873    False
874    False
875    False
876    False
877    False
Name: Garage Yr Blt, Length: 878, dtype: bool

In [92]:
# replace_null_with_mean(dataset_dum, ['Garage Yr Blt'])
# dataset_dum['Garage Yr Blt'] = dataset_dum['Garage Yr Blt'].apply(lambda x: int(x))

In [93]:
# dataset_dum

In [94]:
print_dataset_type(dataset_dum[features_to_be_decide])

Year Built          int64
Yr Sold             int64
Year Remod/Add      int64
Foundation         object
Bsmt Full Bath      int64
Bsmt Half Bath      int64
Full Bath           int64
Half Bath           int64
Bedroom AbvGr       int64
Kitchen AbvGr       int64
TotRms AbvGrd       int64
Fireplaces          int64
Garage Yr Blt     float64
Garage Cars         int64
Garage Area         int64
dtype: object


In [95]:
# drop 'Misc Feature' feature
# dataset_dum.drop(columns=['Misc Feature'], inplace= True)

In [96]:
# plt.plot(dataset_dum['SalePrice'],dataset_dum['Garage Yr Blt'],'.')

In [97]:
# ind = dataset_dum [dataset_dum['Garage Yr Blt'] > 2010].index
# dataset_dum.drop(index=ind , inplace=True)

In [98]:
# ind = dataset_dum[dataset_dum['SalePrice']<30000]['Garage Yr Blt'].index 
# dataset_dum.drop(index = ind,inplace=True)

In [99]:
dataset_dum['Garage Age'] = dataset_dum['Yr Sold'] - dataset_dum['Garage Yr Blt']

In [100]:
# plt.plot(dataset_dum['SalePrice'],dataset_dum['Garage Age'],'.');

In [101]:
# ind = dataset_dum[dataset_dum['SalePrice']>500000]['Garage Age'].index 
# dataset_dum.drop(index = ind,inplace=True)

In [102]:
# house age

In [103]:
dataset_dum[['Yr Sold','Year Built']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Yr Sold,878.0,2007.82574,1.327861,2006.0,2007.0,2008.0,2009.0,2010.0
Year Built,878.0,1970.492027,30.395895,1880.0,1954.0,1972.0,1999.75,2010.0


In [104]:
dataset_dum['Age'] =  dataset_dum['Yr Sold'] - dataset_dum['Year Built']

In [105]:
# plt.plot(dataset_dum['SalePrice'],dataset_dum['Age'],'.');

In [106]:
# remodel age

In [107]:
dataset_dum['remodel age'] = dataset_dum['Yr Sold'] - dataset_dum['Year Remod/Add']

In [108]:
# plt.plot(dataset_dum['SalePrice'],dataset_dum['remodel age'],'.');

In [109]:
# plt.plot (dataset_dum['SalePrice'], dataset_dum['Foundation'], '.');

In [110]:
# drop rows with ['Foundation']=='Wood'
# ind = dataset_dum[dataset_dum['Foundation']=='Wood'].index 
# dataset_dum.drop(index = ind,inplace=True)

In [111]:
list_cat=['Foundation']
x = [ ['Wood',0], ['Stone',0] , ['Slab',1], ['BrkTil',2], ['CBlock', 3], ['PConc', 4]]
dataset_dum[list_cat] = replace_quantity_to_quality(dataset_dum[list_cat], x )

In [112]:
# plt.plot (dataset_dum['SalePrice'], dataset_dum['Foundation'], '.');

In [113]:
dataset_dum['Foundation'].value_counts()

4    383
3    381
2     90
1     15
0      9
Name: Foundation, dtype: int64

In [114]:
# plt.plot(dataset_dum['SalePrice'],dataset_dum[['Bsmt Full Bath']],'.');

In [115]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[[ 'Full Bath']],'.');

In [116]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum['Half Bath'],'.');

In [117]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[['Bedroom AbvGr']],'.');

In [118]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[['Kitchen AbvGr']],'.');

In [119]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[['TotRms AbvGrd']],'.')

In [120]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[['Fireplaces']],'.');

In [121]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[['Garage Cars']],'.');

In [122]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[['Garage Area',]],'.');

In [123]:
dataset_dum.drop(columns=['Misc Feature'], inplace= True)

In [124]:
print_dataset(dataset_dum)

Unnamed: 0,Id,Lot Frontage,Lot Area,Land Slope,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Pool QC,Fence,Misc Val,Yr Sold,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Street_Pave.1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_NA,Mas Vnr Type_None,Mas Vnr Type_Stone,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_NA,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9,MS SubClass_30,MS SubClass_40,MS SubClass_45,MS SubClass_50,MS SubClass_60,MS SubClass_70,MS SubClass_75,MS SubClass_80,MS SubClass_85,MS SubClass_90,MS SubClass_120,MS SubClass_160,MS SubClass_180,MS SubClass_190,Mo Sold_2,Mo Sold_3,Mo Sold_4,Mo Sold_5,Mo Sold_6,Mo Sold_7,Mo Sold_8,Mo Sold_9,Mo Sold_10,Mo Sold_11,Mo Sold_12,Garage Age,Age,remodel age
0,2658,69.000000,9142,1,1910,1950,0.0,2,1,0,1,2,0,1,0,1,0,1020,1020,3,908,1020,1928,0,0,2,0,4,2,1,9,0,0,1910.0,1,1,440,0,0,3,0,60,112,0,0,0,0,2006,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,96.0,96,56
1,2718,69.545961,9662,1,1977,1977,0.0,2,2,3,3,2,0,1,0,1,0,1967,1967,2,1967,0,1967,0,0,2,0,6,2,2,10,0,0,1977.0,3,2,580,2,2,3,170,0,0,0,0,0,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,29.0,29,29
2,2414,58.000000,17104,1,2006,2006,0.0,3,2,4,3,3,2,6,554,1,0,100,654,4,664,832,1496,1,0,2,1,3,1,3,7,1,3,2006.0,2,2,426,2,2,3,100,24,0,0,0,0,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,55.000000,8250,1,1968,1968,0.0,2,2,3,2,2,0,4,250,2,492,210,952,4,1211,0,1211,0,0,1,0,3,1,2,5,1,2,1968.0,1,1,322,2,2,3,0,63,0,0,0,0,0,2008,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,40.0,40,40
876,1672,60.000000,9000,1,1971,1971,0.0,2,2,4,2,2,0,5,616,1,0,248,864,2,864,0,864,0,0,1,0,3,1,2,5,0,0,1974.0,1,2,528,2,2,3,0,0,0,0,0,2,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,33.0,36,36
877,1939,70.000000,8400,1,1955,1955,0.0,2,3,3,2,2,0,4,793,1,0,130,923,2,925,0,925,1,0,1,0,3,1,2,5,2,2,1955.0,1,1,390,2,2,3,81,0,0,0,0,2,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,52.0,52,52


In [125]:
dataset_dum.isnull().sum().sort_values(ascending=False)

Id                      0
Exterior 1st_MetalSd    0
Condition 2_Norm        0
Condition 2_PosA        0
Mas Vnr Type_BrkFace    0
                       ..
Neighborhood_OldTown    0
Neighborhood_SWISU      0
Neighborhood_Sawyer     0
Neighborhood_SawyerW    0
remodel age             0
Length: 225, dtype: int64

In [126]:
# replace_null_with_mean(dataset_dum , ['Mas Vnr Area'])

In [127]:
# plt.plot(dataset_dum['SalePrice'], dataset_dum[['Lot Frontage']],'.');

In [128]:
# ind = dataset_dum[dataset_dum['Lot Frontage'] > 150].index
# dataset_dum.drop(index = ind, inplace=True)

# Features: Final Dtype check ===========================================================

In [129]:
print_dataset_type(dataset_dum)

Id                        int64
Lot Frontage            float64
Lot Area                  int64
Land Slope                int64
Year Built                int64
Year Remod/Add            int64
Mas Vnr Area            float64
Exter Qual                int64
Exter Cond                int64
Foundation                int64
Bsmt Qual                 int64
Bsmt Cond                 int64
Bsmt Exposure             int64
BsmtFin Type 1            int64
BsmtFin SF 1              int64
BsmtFin Type 2            int64
BsmtFin SF 2              int64
Bsmt Unf SF               int64
Total Bsmt SF             int64
Heating QC                int64
1st Flr SF                int64
2nd Flr SF                int64
Gr Liv Area               int64
Bsmt Full Bath            int64
Bsmt Half Bath            int64
Full Bath                 int64
Half Bath                 int64
Bedroom AbvGr             int64
Kitchen AbvGr             int64
Kitchen Qual              int64
TotRms AbvGrd             int64
Fireplac

In [130]:
print_dataset(dataset_dum)

Unnamed: 0,Id,Lot Frontage,Lot Area,Land Slope,Year Built,Year Remod/Add,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating QC,1st Flr SF,2nd Flr SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Fireplaces,Fireplace Qu,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,Screen Porch,Pool QC,Fence,Misc Val,Yr Sold,MS Zoning_FV,MS Zoning_I (all),MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,Street_Pave,Street_Pave.1,Lot Shape_IR2,Lot Shape_IR3,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_NoSewr,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_Greens,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1.5Unf,House Style_1Story,House Style_2.5Fin,House Style_2.5Unf,House Style_2Story,House Style_SFoyer,House Style_SLvl,Roof Style_Gable,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Mansard,Roof Style_Shed,Roof Matl_Metal,Roof Matl_Roll,Roof Matl_Tar&Grv,Roof Matl_WdShake,Roof Matl_WdShngl,Exterior 1st_AsphShn,Exterior 1st_BrkComm,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_PreCast,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_AsphShn,Exterior 2nd_Brk Cmn,Exterior 2nd_BrkFace,Exterior 2nd_CBlock,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_ImStucc,Exterior 2nd_MetalSd,Exterior 2nd_Other,Exterior 2nd_Plywood,Exterior 2nd_PreCast,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosA,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_RRNe,Condition 1_RRNn,Condition 2_Norm,Condition 2_PosA,Mas Vnr Type_BrkFace,Mas Vnr Type_CBlock,Mas Vnr Type_NA,Mas Vnr Type_None,Mas Vnr Type_Stone,Heating_GasA,Heating_GasW,Heating_Grav,Central Air_Y,Electrical_FuseF,Electrical_FuseP,Electrical_SBrkr,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Typ,Garage Type_Attchd,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_CarPort,Garage Type_Detchd,Garage Type_NA,Sale Type_CWD,Sale Type_Con,Sale Type_ConLD,Sale Type_ConLI,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Lot Config_CulDSac,Lot Config_FR2,Lot Config_FR3,Lot Config_Inside,Overall Qual_3,Overall Qual_4,Overall Qual_5,Overall Qual_6,Overall Qual_7,Overall Qual_8,Overall Qual_9,Overall Qual_10,Overall Cond_2,Overall Cond_3,Overall Cond_4,Overall Cond_5,Overall Cond_6,Overall Cond_7,Overall Cond_8,Overall Cond_9,MS SubClass_30,MS SubClass_40,MS SubClass_45,MS SubClass_50,MS SubClass_60,MS SubClass_70,MS SubClass_75,MS SubClass_80,MS SubClass_85,MS SubClass_90,MS SubClass_120,MS SubClass_160,MS SubClass_180,MS SubClass_190,Mo Sold_2,Mo Sold_3,Mo Sold_4,Mo Sold_5,Mo Sold_6,Mo Sold_7,Mo Sold_8,Mo Sold_9,Mo Sold_10,Mo Sold_11,Mo Sold_12,Garage Age,Age,remodel age
0,2658,69.000000,9142,1,1910,1950,0.0,2,1,0,1,2,0,1,0,1,0,1020,1020,3,908,1020,1928,0,0,2,0,4,2,1,9,0,0,1910.0,1,1,440,0,0,3,0,60,112,0,0,0,0,2006,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,96.0,96,56
1,2718,69.545961,9662,1,1977,1977,0.0,2,2,3,3,2,0,1,0,1,0,1967,1967,2,1967,0,1967,0,0,2,0,6,2,2,10,0,0,1977.0,3,2,580,2,2,3,170,0,0,0,0,0,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,29.0,29,29
2,2414,58.000000,17104,1,2006,2006,0.0,3,2,4,3,3,2,6,554,1,0,100,654,4,664,832,1496,1,0,2,1,3,1,3,7,1,3,2006.0,2,2,426,2,2,3,100,24,0,0,0,0,0,2006,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1373,55.000000,8250,1,1968,1968,0.0,2,2,3,2,2,0,4,250,2,492,210,952,4,1211,0,1211,0,0,1,0,3,1,2,5,1,2,1968.0,1,1,322,2,2,3,0,63,0,0,0,0,0,2008,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,40.0,40,40
876,1672,60.000000,9000,1,1971,1971,0.0,2,2,4,2,2,0,5,616,1,0,248,864,2,864,0,864,0,0,1,0,3,1,2,5,0,0,1974.0,1,2,528,2,2,3,0,0,0,0,0,2,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,33.0,36,36
877,1939,70.000000,8400,1,1955,1955,0.0,2,3,3,2,2,0,4,793,1,0,130,923,2,925,0,925,1,0,1,0,3,1,2,5,2,2,1955.0,1,1,390,2,2,3,81,0,0,0,0,2,0,2007,0,0,0,1,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,52.0,52,52


# Features: Target feature: Sale Price ===========================================================

In [131]:
dataset_dum.to_csv('../datasets/test_df_cleaned.csv')