In [1]:
import pandas as pd
import numpy as np
import pickle
import os

In [2]:
from scipy.special import cbrt

In [3]:
os.chdir('../pickles')

In [4]:
numerical = pickle.load(open('numerical_final.pickle','rb'))
categorical = pickle.load(open('categorical.pickle','rb'))
conversion_dict = pickle.load(open('conversion_dict.pickle','rb'))
scale = pickle.load(open('scale.pickle','rb'))
imputation_cols = pickle.load(open('imputation_cols.pickle','rb'))
dummy_cols = pickle.load(open('dummies_final.pickle','rb'))
pca = pickle.load(open('pca.pickle','rb'))

In [5]:
model_cols = pickle.load(open('model_columns.pickle'))

In [6]:
os.chdir('../Input_Data')

In [7]:
numerical.remove('SalePrice')

In [8]:
df = pd.read_csv('test.csv',usecols=numerical+categorical+['Id'],header=0)

In [9]:
df.set_index('Id',inplace=True)

In [10]:
for col in categorical:
    if len(conversion_dict[col])==1:
        category = conversion_dict[col][0]
        df[col+'_dum_'+str(category)] = 0
        df.loc[df[col]==category,col+'_dum_'+str(category)]=1
    else:
        total_categories = len(conversion_dict[col])
        dummies = len(str(int(bin(total_categories)[2:],10)))
        bin_conv=[]
        for i in range(total_categories):
            bin_conv.append(conversion_dict[col][i][1])
        for j in range(dummies):
            df[col+'_dum_'+str(j)]=0
            for i,cat in enumerate([conv[0] for conv in conversion_dict[col]]):
                df.loc[df[col]==cat,col+'_dum_'+str(j)]=bin_conv[i]%10
                bin_conv[i]=bin_conv[i]//10
    df.drop(col,axis=1,inplace=True)
    print(col+' done')   

MSZoning done
Street done
Alley done
LotShape done
LandContour done
Utilities done
LotConfig done
LandSlope done
Neighborhood done
Condition1 done
Condition2 done
BldgType done
HouseStyle done
RoofStyle done
RoofMatl done
Exterior1st done
Exterior2nd done
MasVnrType done
ExterQual done
ExterCond done
Foundation done
BsmtQual done
BsmtCond done
BsmtExposure done
BsmtFinType1 done
BsmtFinType2 done
Heating done
HeatingQC done
CentralAir done
Electrical done
KitchenQual done
Functional done
FireplaceQu done
GarageType done
GarageFinish done
GarageQual done
GarageCond done
PavedDrive done
PoolQC done
Fence done
MiscFeature done
SaleType done
SaleCondition done


In [11]:
transform_dict = {'log':lambda x: np.log(x),'sqr':lambda x: x**2,'sqrt':lambda x: np.sqrt(x),'exp':lambda x:np.exp(x),
                 'cube':lambda x: x**3,'cuberoot': lambda x: cbrt(x)}

In [12]:
os.chdir('../Statistics')

In [13]:
edd = pd.read_csv('edd_v05.csv',header=0)

In [14]:
def transform(x,function):
    if x is not None:
        return function(x)
    else:
        return np.nan

In [15]:
for col in numerical:
    if edd[(edd['Var']==col)&(edd['Status'].notnull())].shape[0]!=0:
        function = transform_dict[edd.loc[edd['Var']==col,'Status'].values[0]]
        df[col] = df[col].apply(lambda x: transform(x,function))

In [16]:
os.chdir('../Imputation_models')

In [17]:
for col in numerical:
    model = pickle.load(open(col+'_impute.pickle','rb'))
    if df[col].isnull().any():
        indices = df.loc[df[col].isnull()].index.tolist()
        df.loc[indices,col]=np.array(model.predict(np.array(df.loc[indices,imputation_cols])))
        del indices
    del model
    print(col+' imputed')

3SsnPorch imputed
BsmtUnfSF imputed
ScreenPorch imputed
BsmtFullBath imputed
OpenPorchSF imputed
HalfBath imputed
OverallCond imputed
BsmtHalfBath imputed
GarageCars imputed
BedroomAbvGr imputed
WoodDeckSF imputed


In [18]:
df = df[numerical+dummy_cols]

In [42]:
os.chdir('../Models/Inferential_models')

In [43]:
model = pickle.load(open('RF.pickle','rb'))

In [44]:
df_submission = pd.DataFrame()
df_submission['SalePrice'] = model.predict(scale.transform(np.array(df[model_cols])))

In [45]:
df_submission.index = df.index

In [46]:
os.chdir('../../Submissions')

In [47]:
df_submission.to_csv('submission.csv')