# Airbnb Descriptive

<p>Now that we have the predictive model complete, we now look to address the qualities that most affect the experience of Airbnb guests. 
Namely, these include quality features, bedrooms, bathrooms and seasonal features that enchance/detract from one's stay.</p>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
prices = pd.read_csv('../../data/Ames_Housing_Price_Data.csv',index_col = 0)


In [2]:
#Creating a price per square foot column to help with feature engineering
prices['Price_sqft'] = prices['SalePrice']/prices['GrLivArea']

In [3]:
#Engineering new features relevant to Airbnb experience
prices['YrSinceUpdate'] = 2010-prices['YearRemodAdd']
prices.drop('YearRemodAdd',axis = 1,inplace = True)
prices['Total_FullBaths'] = prices['BsmtFullBath'] + prices['FullBath']
prices['Total_HalfBaths'] = prices['BsmtHalfBath'] + prices['HalfBath']
prices.drop(['BsmtFullBath','FullBath','BsmtHalfBath','HalfBath'],axis = 1, inplace = True)

In [4]:
airbnb_cols = ['SalePrice','GrLivArea','BldgType','HouseStyle','OverallQual','OverallCond',
              'YearBuilt','YrSinceUpdate','ExterQual','ExterCond','BsmtQual','BsmtCond','BsmtFinType1',
              'BsmtFinType2','HeatingQC','CentralAir','Total_FullBaths','Total_HalfBaths','FireplaceQu',
               'Neighborhood',
              'BedroomAbvGr','KitchenQual','GarageQual','GarageCond','PoolQC','Fence','MiscFeature','Functional']
prices = prices[airbnb_cols]
prices

Unnamed: 0,SalePrice,GrLivArea,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YrSinceUpdate,ExterQual,ExterCond,...,FireplaceQu,Neighborhood,BedroomAbvGr,KitchenQual,GarageQual,GarageCond,PoolQC,Fence,MiscFeature,Functional
1,126000,856,1Fam,1Story,6,6,1939,60,TA,TA,...,Gd,SWISU,2,TA,TA,TA,,,,Typ
2,139500,1049,TwnhsE,1Story,5,5,1984,26,Gd,TA,...,,Edwards,2,Gd,TA,TA,,,,Typ
3,124900,1001,1Fam,1Story,5,9,1930,3,Gd,TA,...,,IDOTRR,2,Gd,TA,Po,,,,Typ
4,114000,1039,1Fam,2Story,4,8,1900,7,Gd,Gd,...,,OldTown,2,TA,TA,TA,,,,Typ
5,227000,1665,1Fam,2Story,8,6,2001,9,Gd,TA,...,,NWAmes,3,Gd,TA,TA,,,,Typ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
759,121000,952,1Fam,1.5Unf,6,6,1916,60,TA,TA,...,Gd,BrkSide,2,Fa,Fa,Po,,,,Typ
760,139600,1733,1Fam,1Story,3,5,1955,55,TA,TA,...,Gd,Edwards,4,TA,TA,TA,,,,Min2
761,145000,2002,Duplex,2Story,5,6,1949,60,TA,TA,...,,Crawfor,4,TA,TA,TA,,,,Typ
762,217500,1842,1Fam,2Story,7,5,2000,10,Gd,TA,...,TA,CollgCr,3,Gd,TA,TA,,,,Typ


### Ensure brand quality by filtering out lower quality houses

In [5]:
prices = prices.loc[(prices.BsmtFinType1!= 'LwQ') & (prices.BsmtFinType1 !='Unf')]
prices = prices.loc[(prices.BsmtFinType2!= 'LwQ') & (prices.BsmtFinType2 !='Unf')]
prices = prices.loc[(prices.Functional == 'Typ')&(prices.OverallQual >=5)&(prices.OverallCond >=5)]

127000    4
235000    4
143000    4
140000    4
147000    4
         ..
131250    1
120500    1
203000    1
99000     1
160000    1
Name: SalePrice, Length: 138, dtype: int64
864     4
925     4
914     3
1256    2
1337    2
       ..
1183    1
1694    1
1027    1
2715    1
1210    1
Name: GrLivArea, Length: 161, dtype: int64
1Fam      154
TwnhsE     12
Duplex      9
2fmCon      3
Twnhs       3
Name: BldgType, dtype: int64
1Story    115
2Story     31
SLvl       18
1.5Fin     13
SFoyer      4
Name: HouseStyle, dtype: int64
5     90
6     41
7     30
8     17
10     2
9      1
Name: OverallQual, dtype: int64
5    82
7    41
6    40
8    14
9     4
Name: OverallCond, dtype: int64
1959    10
1977     8
1965     8
1968     7
1963     6
        ..
1987     1
1992     1
1995     1
1999     1
1900     1
Name: YearBuilt, Length: 62, dtype: int64
60    9
45    7
10    7
51    7
33    6
6     6
7     6
14    5
31    5
15    5
47    5
42    5
35    5
8     5
12    4
5     4
2     4
11    4
32    4

# Converting quality/condition categoricals to numerical

In [6]:
def ordinal_scale(Series):
    '''Returns a pandas Series where quality scores are converted to integers
    
    Args:
    
    Series: a pandas Series of categorical quality scores'''
    return prices[Series].replace({'Po':None,'Fa':1,'TA':2,'Gd':3,'Ex':4})

prices['KitchenQual'] = ordinal_scale('KitchenQual')
prices['ExterQual'] = ordinal_scale('ExterQual')
prices['ExterCond'] = ordinal_scale('ExterCond')
prices['HeatingQC'] = ordinal_scale('HeatingQC')

In [7]:
prices['BsmtCond'].value_counts()

TA    143
Gd     14
Fa      1
Ex      1
Name: BsmtCond, dtype: int64

In [8]:
def switch_ordinals(Series):
    '''
    Returns a pandas series replacing null values with DNE (Does Not Exist) so they can be imputed.
    
    Args:
    

    Series: str, column name in the pandas dataframe.
    '''
    prices[Series].fillna('DNE',inplace = True)
    return prices[Series]
    
impute_cols = ['BsmtQual','BsmtCond','BsmtFinType1',
               'BsmtFinType2','FireplaceQu',
               'GarageQual','GarageCond','PoolQC']

for i in impute_cols:
    prices[i] = switch_ordinals(i)

for i in impute_cols:
    prices[i].replace({'Po':None,'DNE':1,'Fa':2,'TA':3,'Gd':4,'Ex':5},inplace = True)
    
prices['Fence'].fillna('DNE',inplace = True)



In [9]:
prices.columns

Index(['SalePrice', 'GrLivArea', 'BldgType', 'HouseStyle', 'OverallQual',
       'OverallCond', 'YearBuilt', 'YrSinceUpdate', 'ExterQual', 'ExterCond',
       'BsmtQual', 'BsmtCond', 'BsmtFinType1', 'BsmtFinType2', 'HeatingQC',
       'CentralAir', 'Total_FullBaths', 'Total_HalfBaths', 'FireplaceQu',
       'Neighborhood', 'BedroomAbvGr', 'KitchenQual', 'GarageQual',
       'GarageCond', 'PoolQC', 'Fence', 'MiscFeature', 'Functional'],
      dtype='object')

In [10]:
summer_cols = ['PoolQC','CentralAir']
winter_cols = ['FireplaceQu','HeatingQC']

In [11]:
nulls = prices.isnull().sum()
nulls[nulls>1]

FireplaceQu      4
MiscFeature    170
dtype: int64

In [12]:
prices['MiscFeature'].fillna(0,inplace = True)
prices['MiscFeature'] = prices['MiscFeature'].apply(lambda M: 1 if M !=0 else M)
prices['CentralAir'].replace({'N':0,'Y':1},inplace = True)
prices['Functional'].replace({'Typ':0,'Min1':-1,'Min2':-2,
                              'Mod':-3,'Maj1':-4,'Maj2':-5,'Sev':-6,
                              'Sal':-7},inplace = True)

prices['Total_FullBaths'] = prices['Total_FullBaths'].fillna(prices['Total_FullBaths'].mean())
prices['Total_HalfBaths'] = prices['Total_HalfBaths'].fillna(prices['Total_HalfBaths'].mean())


In [13]:
nulls = prices.isnull().sum(axis = 0)
nulls[nulls>1]

FireplaceQu    4
dtype: int64

In [14]:
prices.dropna(axis = 0, how = 'any',inplace = True)
prices.drop('Neighborhood',axis = 1, inplace = True)

In [15]:
prices.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 177 entries, 2 to 742
Data columns (total 27 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   SalePrice        177 non-null    int64  
 1   GrLivArea        177 non-null    int64  
 2   BldgType         177 non-null    object 
 3   HouseStyle       177 non-null    object 
 4   OverallQual      177 non-null    int64  
 5   OverallCond      177 non-null    int64  
 6   YearBuilt        177 non-null    int64  
 7   YrSinceUpdate    177 non-null    int64  
 8   ExterQual        177 non-null    int64  
 9   ExterCond        177 non-null    int64  
 10  BsmtQual         177 non-null    int64  
 11  BsmtCond         177 non-null    int64  
 12  BsmtFinType1     177 non-null    object 
 13  BsmtFinType2     177 non-null    object 
 14  HeatingQC        177 non-null    int64  
 15  CentralAir       177 non-null    int64  
 16  Total_FullBaths  177 non-null    float64
 17  Total_HalfBaths 

In [16]:
prices['FireplaceQu'].value_counts()

1.0    87
3.0    47
4.0    32
2.0     8
5.0     3
Name: FireplaceQu, dtype: int64

In [17]:
airbnb_winter = prices.drop(summer_cols,axis = 1)
airbnb_summer = prices.drop(winter_cols,axis = 1)

In [18]:
airbnb_summer.columns

Index(['SalePrice', 'GrLivArea', 'BldgType', 'HouseStyle', 'OverallQual',
       'OverallCond', 'YearBuilt', 'YrSinceUpdate', 'ExterQual', 'ExterCond',
       'BsmtQual', 'BsmtCond', 'BsmtFinType1', 'BsmtFinType2', 'CentralAir',
       'Total_FullBaths', 'Total_HalfBaths', 'BedroomAbvGr', 'KitchenQual',
       'GarageQual', 'GarageCond', 'PoolQC', 'Fence', 'MiscFeature',
       'Functional'],
      dtype='object')

In [27]:
airbnb_summer['PoolQC'].value_counts()

1    177
Name: PoolQC, dtype: int64

## Reducing to numeric features

In [26]:
airbnb_summer.sample(5)

Unnamed: 0,SalePrice,GrLivArea,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YrSinceUpdate,ExterQual,ExterCond,...,Total_FullBaths,Total_HalfBaths,BedroomAbvGr,KitchenQual,GarageQual,GarageCond,PoolQC,Fence,MiscFeature,Functional
426,143000,1328,1Fam,1Story,6,5,1963,47,2,2,...,2.0,1.0,3,2,3,3,1,DNE,0,0
764,141500,1337,1Fam,1Story,5,8,1950,4,2,3,...,2.0,0.0,3,3,3,3,1,DNE,0,0
2,139500,1049,TwnhsE,1Story,5,5,1984,26,3,2,...,3.0,0.0,2,3,3,3,1,DNE,0,0
569,136000,1040,1Fam,1Story,6,7,1953,57,2,2,...,1.0,1.0,3,2,3,3,1,MnPrv,0,0
178,188000,1696,1Fam,2Story,6,5,2003,7,3,2,...,3.0,1.0,3,3,3,3,1,DNE,0,0


In [20]:
prices['BsmtFinType2'].value_counts()

Rec    64
BLQ    40
ALQ    35
1      23
GLQ    15
Name: BsmtFinType2, dtype: int64

In [21]:
prices['BsmtFinType1'].value_counts()

ALQ    49
GLQ    40
BLQ    35
Rec    31
1      22
Name: BsmtFinType1, dtype: int64

In [22]:

cats = prices.select_dtypes(object)
cats

Unnamed: 0,BldgType,HouseStyle,BsmtFinType1,BsmtFinType2,Fence
2,TwnhsE,1Story,GLQ,ALQ,DNE
6,1Fam,SFoyer,1,1,DNE
20,1Fam,SLvl,Rec,GLQ,DNE
22,1Fam,1Story,ALQ,BLQ,DNE
29,1Fam,2Story,ALQ,BLQ,DNE
...,...,...,...,...,...
652,1Fam,2Story,BLQ,GLQ,DNE
706,1Fam,1.5Fin,Rec,BLQ,MnPrv
717,1Fam,1Story,BLQ,Rec,DNE
719,1Fam,1Story,ALQ,Rec,DNE


In [23]:
dummy = pd.get_dummies(cats)

In [24]:
dummy

Unnamed: 0,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Fin,HouseStyle_1Story,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,...,BsmtFinType2_1,BsmtFinType2_ALQ,BsmtFinType2_BLQ,BsmtFinType2_GLQ,BsmtFinType2_Rec,Fence_DNE,Fence_GdPrv,Fence_GdWo,Fence_MnPrv,Fence_MnWw
2,0,0,0,0,1,0,1,0,0,0,...,0,1,0,0,0,1,0,0,0,0
6,1,0,0,0,0,0,0,0,1,0,...,1,0,0,0,0,1,0,0,0,0
20,1,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,1,0,0,0,0
22,1,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,1,0,0,0,0
29,1,0,0,0,0,0,0,1,0,0,...,0,0,1,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
652,1,0,0,0,0,0,0,1,0,0,...,0,0,0,1,0,1,0,0,0,0
706,1,0,0,0,0,1,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
717,1,0,0,0,0,0,1,0,0,0,...,0,0,0,0,1,1,0,0,0,0
719,1,0,0,0,0,0,1,0,0,0,...,0,0,0,0,1,1,0,0,0,0
