In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error as mape
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.manifold import TSNE
from sklearn.neighbors import KNeighborsRegressor as KNR
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR, LinearSVR


In [2]:
data = pd.read_csv("final.csv")

In [3]:
data = data[data["Area"].isnull() == False]
data.shape

(34953, 16)

In [4]:
data["Price"] = data["Price"]/data["Area"]

In [5]:
data = data[['House Direction', 'Balcony Direction', 'Bedrooms', 'Toilets',
       'Legits', 'Floors', 'Facade', 'Entrance', 'X', "Y",
       'Price']]

In [6]:
data = data[data["Price"] > 100]

In [7]:
IQR = data.Price.quantile(0.75) - data.Price.quantile(0.25)
Lower_fence = data.Price.quantile(0.25) - (IQR * 3)
Upper_fence = data.Price.quantile(0.75) + (IQR * 3)

data = data[(data.Price >= Lower_fence) & (data.Price <= Upper_fence)]
data.Price.describe()

count    29570.000000
mean      1316.770876
std       1248.936842
min        100.500000
25%        260.666502
50%       1044.968705
75%       1818.181818
max       6722.689076
Name: Price, dtype: float64

In [8]:
hotdf = pd.get_dummies(data)

In [9]:
hotdf["Entrance"].fillna(value=hotdf["Entrance"].mode().values[0], inplace=True)

In [10]:
features = hotdf.columns.tolist()
features.remove("Price")

In [11]:
train, test = train_test_split(hotdf, test_size = 0.2)
train, dev = train_test_split(train, test_size = 0.1)

In [12]:
scaler = MinMaxScaler()

train[features] = scaler.fit_transform(train[features])

test[features] = scaler.transform(test[features])
dev[features] = scaler.transform(dev[features])

In [118]:
knr = KNR(n_neighbors=2, weights="distance", metric="minkowski")

In [119]:
knr.fit(train[features], train["Price"])

KNeighborsRegressor(n_neighbors=2, weights='distance')

In [120]:
pred = knr.predict(dev[features])
print(mape(dev["Price"], pred))

0.6631183434614026


In [121]:
pred = knr.predict(test[features])
print(mape(test["Price"], pred))

0.5931645870218909


In [18]:
svr = LinearSVR()
svr.fit(train[features], train["Price"])

LinearSVR()

In [19]:
pred = svr.predict(test[features])
print(mape(test["Price"], pred))

0.7803384472200945
