# Libraries

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [535]:
df = pd.read_csv("data.csv")
df

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,5/2/2014 0:00,313000.0,3.0,1.5,1340.0,7912.0,1.5,0.0,0.0,3,1340.0,0.0,1955.0,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,5/2/2014 0:00,2384000.0,5.0,2.5,3650.0,9050.0,2.0,0.0,4.0,5,3370.0,280.0,1921.0,0,709 W Blaine St,Seattle,WA 98119,USA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4598,7/10/2014 0:00,203400.0,4.0,2.0,2090.0,6630.0,1.0,0.0,0.0,3,1070.0,1020.0,1974.0,0,5148 S Creston St,Seattle,WA 98178,USA
4599,7/10/2014 0:00,220600.0,3.0,2.5,1490.0,8102.0,2.0,0.0,0.0,4,1490.0,0.0,1990.0,0,18717 SE 258th St,Covington,WA 98042,USA


In [536]:

def fillNaObjMode(cols):
    for i in cols:
        df[i] = df[i].fillna(df[i].mode()[0])

columns = ['street','city','statezip','country']
fillNaObjMode(columns)

In [537]:
def fillNaIntMode(cols):
    for i in cols:
        df[i] = df[i].fillna(df[i].mode()[0])

columns = ['bedrooms','bathrooms','floors','waterfront','view','yr_built']
fillNaIntMode(columns)

In [538]:
def fillNaFloat(cols):
    for i in cols:
        df[i] = df[i].fillna(df[i].mean())

columns = ['price','sqft_living','sqft_lot','sqft_above','sqft_basement']
fillNaFloat(columns)

In [539]:


def convertFloatintoInt(cols):
    for i in cols:
        df[i] = df[i].astype('int64')

columns = ['bedrooms','bathrooms','floors','waterfront','view','yr_built','price','sqft_living','sqft_lot','sqft_above','sqft_basement']
convertFloatintoInt(columns)

In [540]:
df['price'].min()

np.int64(0)

In [541]:
def dataEncoder(cols):
    for i in cols:
        dataLabelEncoder = LabelEncoder()
        df[i] = dataLabelEncoder.fit_transform(df[i])

columns = ['city','statezip']
dataEncoder(columns)

In [542]:
df = df.drop('date', axis=1)
df = df.drop('street', axis=1)
df = df.drop('city', axis=1)
df = df.drop('country', axis=1)
df = df.drop('statezip', axis=1)


In [543]:
price_range = [0, 200000, 1000000,5000000, 30000000] 
labels = ['low', 'medium', 'high','very high']  
df['price_category'] = pd.cut(df['price'], bins=price_range, labels=labels, right=False)

In [544]:
df.to_csv(r'encoded-data.csv', index = False, header = True)

# Train-Test Split

In [545]:

trainData, testData = train_test_split(df, test_size=0.2, shuffle=False)

In [546]:

train_x = trainData.iloc()[:, :-1]
test_x  = testData.iloc()[:, :-1]

train_y = trainData.iloc()[:, -1]
test_y  = testData.iloc()[:, -1]


# Model Apply / Classifier Application

In [547]:

model = SVC()
model.fit(train_x, train_y)

print(model)

SVC()


In [548]:
# Saving Trained Model
pickle.dump(model, open('model_svc.pkl', 'wb'))

In [549]:
# Load saved Model
model = pickle.load(open('model_svc.pkl', 'rb'))

In [550]:
model_predictions = model.predict(test_x)

In [551]:
model_accuracy_score = accuracy_score(test_y, model_predictions)

print("-- Model Accuracy Score: ", end='')
print(round(model_accuracy_score,3))

-- Model Accuracy Score: 0.979


In [552]:
testdata_predict = testData.copy(deep=True)
pd.options.mode.chained_assignment = None

testdata_predict['Prediction'] = model_predictions

In [553]:
model_accuracy_score = accuracy_score(testdata_predict['price_category'], testdata_predict['Prediction'])

print("-- Model Accuracy Score: ", end='')
print(round(model_accuracy_score,3))

-- Model Accuracy Score: 0.979
