In [114]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from ipywidgets import interact, fixed  
from itertools import product
import scipy.stats as ss
from sklearn.ensemble import RandomForestRegressor 
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Lasso, LassoCV, LinearRegression
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.feature_selection import SelectKBest 
from sklearn.feature_selection import mutual_info_regression, chi2
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction import DictVectorizer

In [115]:
#!pip install ipywidgets

## Lecture des données 

In [116]:
df = pd.read_csv("housing_dataset.csv")

In [117]:
df.duplicated().sum()

0

In [118]:
del df["Id"]

In [119]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   MSSubClass     1460 non-null   int64  
 1   MSZoning       1460 non-null   object 
 2   LotFrontage    1201 non-null   float64
 3   LotArea        1460 non-null   int64  
 4   Street         1460 non-null   object 
 5   Alley          91 non-null     object 
 6   LotShape       1460 non-null   object 
 7   LandContour    1460 non-null   object 
 8   Utilities      1460 non-null   object 
 9   LotConfig      1460 non-null   object 
 10  LandSlope      1460 non-null   object 
 11  Neighborhood   1460 non-null   object 
 12  Condition1     1460 non-null   object 
 13  Condition2     1460 non-null   object 
 14  BldgType       1460 non-null   object 
 15  HouseStyle     1460 non-null   object 
 16  OverallQual    1460 non-null   int64  
 17  OverallCond    1460 non-null   int64  
 18  YearBuil

In [120]:
df.shape

(1460, 80)

# Extraction des données numériques 

In [121]:
numerical_df = df.select_dtypes(include =['int64'])

In [122]:
numerical_df.head()

Unnamed: 0,MSSubClass,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,...,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
0,60,8450,7,5,2003,2003,706,0,150,856,...,0,61,0,0,0,0,0,2,2008,208500
1,20,9600,6,8,1976,1976,978,0,284,1262,...,298,0,0,0,0,0,0,5,2007,181500
2,60,11250,7,5,2001,2002,486,0,434,920,...,0,42,0,0,0,0,0,9,2008,223500
3,70,9550,7,5,1915,1970,216,0,540,756,...,0,35,272,0,0,0,0,2,2006,140000
4,60,14260,8,5,2000,2000,655,0,490,1145,...,192,84,0,0,0,0,0,12,2008,250000


In [123]:
numerical_df.isnull().sum().sum()

0

## Détection d'outliers et Nettoyage des données 

In [124]:
numerical_df.boxplot("SalePrice",figsize=(16,8))

<Axes: title={'center': 'k = 2'}, xlabel='GrLivArea', ylabel='SalePrice'>

### Fonction permettant de détercter et nettoyer les outliers dans une colonne donnée

Nous allons utiliser la méthode de l'écart intertquartile 

In [125]:
#@interact(df_arg=fixed(numerical_df),k=(3,0,-0.5))
def remove_outliers_iqr(df_arg,k = 1.5):
    df_iqr = df_arg.copy()
    #columns = [col for col in df_arg.columns]
    for col in df_arg.columns:
        if df_arg[col].dtypes!="objects":
            q25, q75 = q25, q75 = df_iqr[col].quantile(.25), df_iqr[col].quantile(.75)
            ecart_iqr = q75- q25
            cut_off = ecart_iqr*k
            lower = q25-cut_off
            upper = q75+cut_off
            df_iqr[col] =np.where(((df_iqr[col]< lower)|(df_iqr[col]> upper)),df_arg[col].median(),df_iqr[col])
    return df_iqr  

In [126]:
k1 = 3
k2 = 2.5
k3 = 2
df_res = remove_outliers_iqr(numerical_df, k1)
df_res2 = remove_outliers_iqr(numerical_df, k2)
df_res3 = remove_outliers_iqr(numerical_df,k3)

In [127]:
numerical_df.describe()

Unnamed: 0,MSSubClass,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,...,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
count,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,...,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,56.9,10516.83,6.1,5.58,1971.27,1984.87,443.64,46.55,567.24,1057.43,...,94.24,46.66,21.95,3.41,15.06,2.76,43.49,6.32,2007.82,180921.2
std,42.3,9981.26,1.38,1.11,30.2,20.65,456.1,161.32,441.87,438.71,...,125.34,66.26,61.12,29.32,55.76,40.18,496.12,2.7,1.33,79442.5
min,20.0,1300.0,1.0,1.0,1872.0,1950.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2006.0,34900.0
25%,20.0,7553.5,5.0,5.0,1954.0,1967.0,0.0,0.0,223.0,795.75,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,129975.0
50%,50.0,9478.5,6.0,5.0,1973.0,1994.0,383.5,0.0,477.5,991.5,...,0.0,25.0,0.0,0.0,0.0,0.0,0.0,6.0,2008.0,163000.0
75%,70.0,11601.5,7.0,6.0,2000.0,2004.0,712.25,0.0,808.0,1298.25,...,168.0,68.0,0.0,0.0,0.0,0.0,0.0,8.0,2009.0,214000.0
max,190.0,215245.0,10.0,9.0,2010.0,2010.0,5644.0,1474.0,2336.0,6110.0,...,857.0,547.0,552.0,508.0,480.0,738.0,15500.0,12.0,2010.0,755000.0


In [128]:
df_res.describe()

Unnamed: 0,MSSubClass,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,...,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
count,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,...,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,56.9,9518.46,6.1,5.58,1971.27,1984.87,440.04,0.0,567.24,1047.98,...,92.65,42.62,0.0,0.0,0.0,0.0,0.0,6.32,2007.82,177536.6
std,42.3,3554.68,1.38,1.11,30.2,20.65,435.29,0.0,441.87,403.45,...,121.53,55.96,0.0,0.0,0.0,0.0,0.0,2.7,1.33,70399.27
min,20.0,1300.0,1.0,2.0,1872.0,1950.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2006.0,34900.0
25%,20.0,7553.5,5.0,5.0,1954.0,1967.0,0.0,0.0,223.0,795.75,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,129975.0
50%,50.0,9477.75,6.0,5.0,1973.0,1994.0,383.25,0.0,477.5,991.25,...,0.0,25.0,0.0,0.0,0.0,0.0,0.0,6.0,2008.0,163000.0
75%,70.0,11321.0,7.0,6.0,2000.0,2004.0,712.0,0.0,808.0,1288.75,...,168.0,65.0,0.0,0.0,0.0,0.0,0.0,8.0,2009.0,212000.0
max,190.0,23595.0,10.0,9.0,2010.0,2010.0,2260.0,0.0,2336.0,2633.0,...,670.0,267.0,0.0,0.0,0.0,0.0,0.0,12.0,2010.0,465000.0


In [129]:
def plot_view(x_col,numerical_df = numerical_df,target = "SalePrice"):
    fig, (ax1,ax2,ax3) = plt.subplots(1, 3, figsize  =(20,8))
    numerical_df.plot.scatter(x=x_col, y = target, ax = ax1, color ="red")
    df_res.plot.scatter(x=x_col, y = target, ax = ax1, color = "green")
    numerical_df.plot.scatter(x=x_col, y = target, ax = ax2, color ="red")
    df_res2.plot.scatter(x=x_col, y = target, ax = ax2, color = "green")
    numerical_df.plot.scatter(x=x_col, y = target, ax = ax3, color ="red")
    df_res3.plot.scatter(x=x_col, y = target, ax = ax3, color = "green")
    ax1.set_title(f'k = {k1}')
    ax2.set_title(f'k = {k2}')
    ax3.set_title(f'k = {k3}')

### On constate que pour une valeur de K=0.5, on arrive a déecter et enlever toutes les valeurs aberrantes selon cette méthode d'écart interquartile

## Feature Selection

### Correaltion entre différentes variables numériques

In [130]:
correlation_num =numerical_df.corr()

In [131]:
correlation_num.head()

Unnamed: 0,MSSubClass,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,...,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
MSSubClass,1.0,-0.14,0.03,-0.06,0.03,0.04,-0.07,-0.07,-0.14,-0.24,...,-0.01,-0.01,-0.01,-0.04,-0.03,0.01,-0.01,-0.01,-0.02,-0.08
LotArea,-0.14,1.0,0.11,-0.01,0.01,0.01,0.21,0.11,-0.0,0.26,...,0.17,0.08,-0.02,0.02,0.04,0.08,0.04,0.0,-0.01,0.26
OverallQual,0.03,0.11,1.0,-0.09,0.57,0.55,0.24,-0.06,0.31,0.54,...,0.24,0.31,-0.11,0.03,0.06,0.07,-0.03,0.07,-0.03,0.79
OverallCond,-0.06,-0.01,-0.09,1.0,-0.38,0.07,-0.05,0.04,-0.14,-0.17,...,-0.0,-0.03,0.07,0.03,0.05,-0.0,0.07,-0.0,0.04,-0.08
YearBuilt,0.03,0.01,0.57,-0.38,1.0,0.59,0.25,-0.05,0.15,0.39,...,0.22,0.19,-0.39,0.03,-0.05,0.0,-0.03,0.01,-0.01,0.52


### heatmap de la matrice de corrélation (r>.5)

In [132]:
plt.figure(figsize= (20,10))
plt.title('Correlation')
sns.heatmap(correlation_num,vmin = -1, cmap = "coolwarm",vmax = 1, annot = True)

<Axes: title={'center': 'Correlation'}>

### Focus sur la correlation entre la target et les autres features

In [133]:
def corr_feature_selector(threshold, target = "SalePrice",corr = correlation_num):
    target_corr= corr[corr[target].abs()>=threshold][target]
    return target_corr, target_corr.axes[0].to_list()


In [134]:
target_corr, corr_feat_list =corr_feature_selector(.5)
corr_feat_list

['OverallQual',
 'YearBuilt',
 'YearRemodAdd',
 'TotalBsmtSF',
 '1stFlrSF',
 'GrLivArea',
 'FullBath',
 'TotRmsAbvGrd',
 'GarageCars',
 'GarageArea',
 'SalePrice']

### Heatmap de la corrélation entre ces 11 variables 

In [135]:
plt.figure(figsize= (20,10))
plt.title('Correlation')
sns.heatmap(correlation_num[correlation_num.abs()>.5],vmin = -1,cmap = "coolwarm",vmax = 1, annot = True, robust=True)

<Axes: title={'center': 'Correlation'}>

In [136]:
target_corr.nlargest(7)

SalePrice     1.00
OverallQual   0.79
GrLivArea     0.71
GarageCars    0.64
GarageArea    0.62
TotalBsmtSF   0.61
1stFlrSF      0.61
Name: SalePrice, dtype: float64

### Liste des variables numériques ayant une corrélation supéireure ou égale à 0.5:
- OverallQual
- YearBuilt
- YearRemodAdd
- TotalBsmtSF
- 1stFlrSF 
- GrLivArea
- FullBath
- TotRmsAbvGrd
- GarageCars
- GarageArea


### Barplot des variables numérriques ayant une corrélation supérieure à 0.5 avec la target 

In [137]:
target_corr.plot.bar(figsize =(16,5), color=np.where(target_corr.values>.6,"green","red"))

<Axes: title={'center': 'Correlation'}>

### Toutes ces 10 variables évoluent dans le même sens que le SalePrice car corrélation positive

### Focus sur le sous-ensemble de données 11 variables 

In [138]:
top_11_numerical_df = numerical_df[corr_feat_list]

In [139]:
top_11_numerical_df.head()

Unnamed: 0,OverallQual,YearBuilt,YearRemodAdd,TotalBsmtSF,1stFlrSF,GrLivArea,FullBath,TotRmsAbvGrd,GarageCars,GarageArea,SalePrice
0,7,2003,2003,856,856,1710,2,8,2,548,208500
1,6,1976,1976,1262,1262,1262,2,6,2,460,181500
2,7,2001,2002,920,920,1786,2,6,2,608,223500
3,7,1915,1970,756,961,1717,1,7,3,642,140000
4,8,2000,2000,1145,1145,2198,2,9,3,836,250000


In [140]:
sns.pairplot(top_11_numerical_df)

<seaborn.axisgrid.PairGrid at 0x20edc4545b0>

In [141]:
#!pip install -U scikit-learn

### RandomForest regrssor

In [142]:
def rf_features_selector(df_arg,top_n,target_name ="SalePrice"):
    seed = np.random.seed(10)
    #df_w = df_arg.copy()
    features = [col for col in df_arg.columns if col!=target_name]
    X = df_arg.copy()[features]
    y = df_arg[target_name].values
    model = RandomForestRegressor(random_state = seed)
    model.fit(X,y)
    #get feaures importance
    importance = model.feature_importances_
    indices = np.argsort(importance)
    feat_importances = pd.Series(importance, index=X.columns)
    plot = feat_importances.nlargest(top_n).plot(kind='barh', figsize =(10,5))
    plt.xlabel('importance')
    rf_features = pd.DataFrame(feat_importances.nlargest(top_n)).axes[0].tolist()
    return plot, rf_features

In [143]:
plot, rf_feat_list = rf_features_selector(numerical_df,15)
rf_feat_list

['OverallQual',
 'GrLivArea',
 'TotalBsmtSF',
 '2ndFlrSF',
 'BsmtFinSF1',
 '1stFlrSF',
 'LotArea',
 'GarageCars',
 'GarageArea',
 'YearBuilt',
 'YearRemodAdd',
 'FullBath',
 'TotRmsAbvGrd',
 'WoodDeckSF',
 'OverallCond']

### LasssoRegression

In [144]:
def lassoReg_feat_selector(df_arg=numerical_df,target_name ="SalePrice"):
    np.random.seed(10)
    features = [col for col in df_arg.columns if col!=target_name]
    X = df_arg.copy()[features]
    y = df_arg[target_name].values
    estimator = LassoCV(cv=5)
    sfm = SelectFromModel(estimator, prefit=False, norm_order=1, max_features=None)
    sfm.fit(X,y)
    feature_idx = sfm.get_support()
    Lasso_features = X.columns[feature_idx].tolist()
    return Lasso_features

In [145]:
lasso_feat_list = lassoReg_feat_selector()
lasso_feat_list

['LotArea',
 'YearBuilt',
 'YearRemodAdd',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '2ndFlrSF',
 'GrLivArea',
 'GarageArea',
 'WoodDeckSF',
 'MiscVal']

### Recursive feature selection

In [146]:
def rfe_feature_selector(df_arg= numerical_df,target_name ="SalePrice"):
    np.random.seed(10)
    features = [col for col in df_arg.columns if col!=target_name]
    X = df_arg.copy()[features]
    y = df_arg[target_name].values
    rfe= RFE(estimator=LinearRegression(),n_features_to_select=15)
    rfe.fit(X,y)
    rfe_support = rfe.get_support()
    rfe_feat = X.loc[:,rfe_support].columns.to_list()
    return rfe_feat
    

In [147]:
rfe_feat_list = rfe_feature_selector()
rfe_feat_list


['OverallQual',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'LowQualFinSF',
 'GrLivArea',
 'BsmtFullBath',
 'BsmtHalfBath',
 'FullBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'GarageCars']

### Mutual information feature selection

In [148]:
def mif_feature_selector(df_arg= numerical_df,target_name ="SalePrice"):
    np.random.seed(10)
    features = [col for col in df_arg.columns if col!=target_name]
    X = df_arg.copy()[features]
    y = df_arg[target_name].values
    mif= SelectKBest(score_func=mutual_info_regression,k =15)
    mif.fit(X, y)
    mif_support = mif.get_support()
    mif_feat = X.loc[:,mif_support].columns.to_list()
    return mif_feat
    
    
    

In [149]:
mif_feat_list = mif_feature_selector()
mif_feat_list

['LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'YearRemodAdd',
 'BsmtFinSF1',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'GrLivArea',
 'FullBath',
 'TotRmsAbvGrd',
 'Fireplaces',
 'GarageCars',
 'GarageArea']

## Combinaison des features selectionnées par différentes méthodes 

In [150]:
from collections import Counter
combined_feat_list  =rfe_feat_list+lasso_feat_list+rfe_feat_list+corr_feat_list+mif_feat_list
feat_freq= Counter(combined_feat_list)
feat_freq_df = pd.DataFrame({"Feature":feat_freq.keys(),"Frequence":feat_freq.values()})
feat_freq_df.sort_values("Frequence", ascending =False)
feat_freq_df = feat_freq_df[feat_freq_df.Feature!="SalePrice"]

In [151]:
feat_freq_df

Unnamed: 0,Feature,Frequence
0,OverallQual,4
1,BsmtFinSF1,4
2,BsmtFinSF2,3
3,BsmtUnfSF,3
4,TotalBsmtSF,5
5,1stFlrSF,4
6,2ndFlrSF,4
7,LowQualFinSF,2
8,GrLivArea,5
9,BsmtFullBath,2


### storing 

In [153]:
#feat_freq_df.to_csv("Selected_numerical_features.csv",index = False)

In [154]:
feat_freq_df.sort_values("Frequence", ascending=False).plot.bar("Feature", figsize = (16,8))

<Axes: xlabel='Feature'>

In [None]:
selected_features = feat_freq_df[feat_freq_df.Frequence>0].Feature.values
selected_features

array(['OverallQual', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF',
       'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
       'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'BedroomAbvGr',
       'KitchenAbvGr', 'GarageCars', 'LotArea', 'YearBuilt',
       'YearRemodAdd', 'GarageArea', 'WoodDeckSF', 'MiscVal',
       'TotRmsAbvGrd', 'OverallCond', 'Fireplaces'], dtype=object)

In [None]:
len(selected_features)

24

### Suppression des colonnes
- BsmtFinSF2
- BsmtFinSF1 et 
- BsmtUnSF 

Car elles sont toutes combinées dans la colonne **TotalBsmtSF**

In [None]:
final_df = numerical_df[selected_features]
final_features = final_df.drop(columns=["BsmtFinSF1","BsmtFinSF2","BsmtUnfSF","LowQualFinSF","BsmtFullBath","BsmtHalfBath"]).columns

### Visualisation des résultats de nettoyage

In [None]:
for col in final_df.columns:
    plot_view(col)

#### Installation du package lazypredict 

In [None]:
%pip install lazypredict




# Prediction avec les modéles classique 

In [None]:
import lazypredict
from lazypredict.Supervised import LazyRegressor

In [None]:
def lazy_predict(features,df_arg,target_name="SalePrice", k= 2.5):
    df_cleaned =remove_outliers_iqr(df_arg.copy(),k)
    df_cleaned = df_cleaned.dropna(axis=0)
    if target_name in features:
        features.remove(target_name)
    X = df_cleaned[features].values
    y = df_cleaned.SalePrice.values
    x_slc = MinMaxScaler()
    y_scl = MinMaxScaler()
    X_scaled = x_slc.fit_transform(X)
    y_scaled=y_scl.fit_transform(y.reshape(-1,1))
    trainX,testX,trainY,testY = train_test_split(X_scaled,y_scaled,train_size =0.7)
    reg = LazyRegressor(verbose=0,ignore_warnings=True, custom_metric=None)
    models,predictions = reg.fit(trainX,testX, trainY,testY)
    return models, predictions
    

In [None]:
experiment_selected_features = feat_freq_df[feat_freq_df.Frequence>1].Feature.values
experiment_selected_features

array(['OverallQual', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF',
       'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
       'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'BedroomAbvGr',
       'KitchenAbvGr', 'GarageCars', 'LotArea', 'YearBuilt',
       'YearRemodAdd', 'GarageArea', 'TotRmsAbvGrd'], dtype=object)

In [None]:
models,predictions = lazy_predict(experiment_selected_features,numerical_df,3)
print(models) 

100%|██████████| 42/42 [00:21<00:00,  1.93it/s]

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
LGBMRegressor                                0.88       0.89  0.06        0.20
HistGradientBoostingRegressor                0.88       0.89  0.06        0.61
GradientBoostingRegressor                    0.88       0.89  0.06        0.39
ExtraTreesRegressor                          0.87       0.88  0.07        0.50
NuSVR                                        0.87       0.88  0.07        0.14
RandomForestRegressor                        0.86       0.87  0.07        2.06
XGBRegressor                                 0.86       0.87  0.07        0.26
SGDRegressor                                 0.85       0.86  0.07        0.02
RidgeCV                                      0.85       0.86  0.07        0.03
BayesianRidge                                0.85       0.86  0.07        0.02
Ridge                                        0.85   




In [None]:
models[models>=0][["Adjusted R-Squared","R-Squared","RMSE"]].nlargest(10,["Adjusted R-Squared","R-Squared","RMSE"]).plot.bar(figsize  =(16,8))

<Axes: xlabel='Model'>

### Netoyage de tous le dataset


In [None]:
df_num_cleaned = remove_outliers_iqr(numerical_df, 3)

In [None]:
df_final = pd.concat([df_num_cleaned,df.select_dtypes(include=["object"])])

In [None]:
df_final.isnull().sum() 

MSSubClass       1460
LotArea          1494
OverallQual      1460
OverallCond      1461
YearBuilt        1460
                 ... 
PoolQC           2913
Fence            2639
MiscFeature      2866
SaleType         1460
SaleCondition    1460
Length: 77, dtype: int64