# Housing Prices: Advanced Regression


In [21]:
import random
import os
import numpy as np
import pandas as pd
import tensorflow as tf

In [19]:
def encode_features(df_train, df_test):
    '''
    Takes columns whose values are strings (objects)
    and categorizes them into discrete numbers.
    This makes it feasible to use regression
    '''
    features = list(df_train.select_dtypes(include=['object']).columns)
    df_combined = pd.concat([df_train[features], df_test[features]])

    for feature in features:
        unique_categories = list(df_combined[feature].unique())
        map_dict = {}
        for idx, category in enumerate(unique_categories):
            map_dict[category] = idx + 1
        df_train[feature] = df_train[feature].map(map_dict)
        df_test[feature] = df_test[feature].map(map_dict)
    
    return df_train, df_test


def cleanup(df):
    '''
    Cleans data
        1. Drops unwanted features
        2. Fills missing values with the mode
    '''
    to_drop = ['MiscFeature', 'MiscVal', 'GarageArea', 'GarageYrBlt']
    df = df.drop(to_drop, axis=1)
    for column in df.columns:
        x = df[column].dropna().value_counts().index[0]
        df = df.fillna(x)
    return df

In [46]:
data_dir = 'data'

train_dataset = pd.read_csv(os.path.join(data_dir, 'train.csv'))
test_dataset = pd.read_csv(os.path.join(data_dir, 'test.csv'))

train_dataset = cleanup(train_dataset)
test_dataset = cleanup(test_dataset)
train_dataset, test_dataset = encode_features(train_dataset, test_dataset)

In [47]:
y_train = train_dataset['SalePrice'].as_matrix()
x_train = train_dataset.drop(['SalePrice', 'Id'], axis=1).as_matrix()

## Training with only Linear Regression
Using stochasitc gradient descent with batch size of 16

In [52]:
batch_size = 16
train_size = np.shape(x_train)[0]
num_features = np.shape(x_train)[1]

graph = tf.Graph()
with graph.as_default():
    
    # Input
    
    # Variables
    
    # Model
    
    # Loss Computation
    
    # Optimizer
    
    # Predictions
    test = tf.zeros(5)

## Training with Neural Networks and Regression