In [57]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import warnings

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.decomposition import IncrementalPCA
from sklearn.metrics import classification_report
from sklearn.svm import SVR
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_curve
from sklearn import metrics
from sklearn.metrics import mean_squared_error

In [58]:
warnings.filterwarnings('ignore')

In [59]:
dataset = pd.read_csv('Housing.csv')

df = dataset.drop('furnishingstatus', axis=1)

In [60]:
svar_list = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']
def binary_mapping(x):
    return x.map({'yes':1, 'no':0})
df[svar_list] = df[svar_list].apply(binary_mapping)

df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea
0,13300000,7420,4,2,3,1,0,0,0,1,2,1
1,12250000,8960,4,4,4,1,0,0,0,1,3,0
2,12250000,9960,3,2,2,1,0,1,0,0,2,1
3,12215000,7500,4,2,2,1,0,1,0,1,3,1
4,11410000,7420,4,1,2,1,1,1,0,1,2,0


In [61]:
X = df[['area','bedrooms','bathrooms','stories','mainroad','guestroom','basement','hotwaterheating','airconditioning','parking','prefarea']]
y = dataset['price']

X.shape

(545, 11)

In [62]:
y.shape

(545,)

In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, test_size = 0.2, random_state = 0)

sc = StandardScaler()

Xsc_train = sc.fit_transform(X_train)
Xsc_test = sc.transform(X_test)

In [81]:
svr_rbf = SVR(kernel='rbf', C=1e6, gamma=0.1)
svr_lin = SVR(kernel='linear', C=1e6)
svr_ply = SVR(kernel='poly', C=1e6, degree=2)

y_rbf = svr_rbf.fit(Xsc_train,y_train).predict(Xsc_test)
y_lin = svr_lin.fit(Xsc_train,y_train).predict(Xsc_test)
y_ply = svr_ply.fit(Xsc_train,y_train).predict(Xsc_test)

print('Loss:', mean_squared_error(y_test, y_lin), mean_squared_error(y_test, y_rbf), mean_squared_error(y_test, y_ply))

Loss: 947860338641.9976 1279027590158.7593 1622321986946.383


In [82]:
model = PCA()

model.fit(Xsc_train)

PCA()

In [83]:
PCA_train = IncrementalPCA(n_components = 11)
df_train = PCA_train.fit_transform(Xsc_train)
df_test = PCA_train.transform(Xsc_test)

In [86]:
y_rbf_pca = svr_rbf.fit(df_train,y_train).predict(X_test)
y_lin_pca = svr_lin.fit(df_train,y_train).predict(X_test)
y_ply_pca = svr_ply.fit(df_train,y_train).predict(X_test)

In [90]:
print('Loss:', mean_squared_error(y_test, y_lin_pca), mean_squared_error(y_test, y_rbf_pca), mean_squared_error(y_test, y_ply_pca))

Loss: 2.662630291102947e+19 3086028271056.9614 4.5372558100900655e+25
