## Support Vector Regression
The support vector regression code for housing price predictions.

In [1]:
# LOAD THE PACKAGES
from sklearn import svm
import pandas as pd
import numpy as np

In [3]:
# LOAD THE DATA
X = pd.read_csv('train_x2.csv')
y = np.ravel(pd.read_csv('train_y2.csv', header = None))
Xtest = pd.read_csv('test_x2.csv')

In [4]:
# Display Data
y

array([ 12.24769432,  12.10901093,  12.31716669, ...,  12.49312952,
        11.86446223,  11.90158345])

In [7]:
# For SVR - example prediction
#    (kernel=’rbf’, degree=3, gamma=’auto’, coef0=0.0, tol=0.001, C=1.0, epsilon=0.1
#     shrinking=True, cache_size=200, verbose=False, max_iter=-1)
clf = svm.SVR()
clf.fit(X,y)
clf.predict(Xtest)

array([ 11.67864878,  12.00371923,  12.12761163, ...,  11.96145194,
        11.65217886,  12.42242196])

In [8]:
clf.score(X,y)

0.96225932433179728

In [9]:
np.arange(1,5,1)

array([1, 2, 3, 4])

In [10]:
# grid_para_svm = [
#     {'C': [11], 'epsilon': [0.001,0.01,0.005,0.05,0.1,1], 'gamma': np.arange(0.006,0.012,0.002), 'kernel': ['rbf']},
#     {'C': [11], 'epsilon': [0.001,0.01,0.005,0.05,0.1,1], 'degree': np.arange(1,6,1), 'kernel': ['poly']}
# ]

In [11]:
grid_para = [
    {'C': [260],
     'epsilon': [0.05],
     'gamma': [0.0001],
     'kernel': ['rbf']}]

In [12]:
grid_para

[{'C': [260], 'epsilon': [0.05], 'gamma': [0.0001], 'kernel': ['rbf']}]

In [13]:
# Running grid search
from sklearn import model_selection
grid_search_svm = model_selection.GridSearchCV(clf, grid_para, scoring='neg_mean_squared_error',cv=3, n_jobs=-1)
grid_search_svm.fit(X, y)

GridSearchCV(cv=3, error_score='raise',
       estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'C': [260], 'gamma': [0.0001], 'kernel': ['rbf'], 'epsilon': [0.05]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=0)

In [14]:
# display best parameters found from grid search
grid_search_svm.best_params_

{'C': 260, 'epsilon': 0.05, 'gamma': 0.0001, 'kernel': 'rbf'}

In [15]:
# Find the root mean squared log error on whole model
(-grid_search_svm.best_score_)** 0.5

0.13726033287701736

In [16]:
# Function that converts to kaggle submission formatted pandas Dataframe
def kaggle(x):
    # Input x - the model prediction
    # returns dataframe of sales price and Id ready to be written to
    # csv for kaggle competition submission.
    return(pd.DataFrame({'Id': (np.arange(len(Xtest)) + 1461),
                         'SalePrice': np.exp(x)}))

In [128]:
# create prediction and convert the results to Kaggle data frame
result = kaggle(grid_search_svm.predict(Xtest))

In [129]:
result.to_csv('../result.csv', index = False)