# House-Price-Prediction - use Neural Network

使用Keras/tensorflow Neural Network 來達成波士頓房地產價錢預測

    Objectives:
    1. Predict the sale price for each house.
    2. Minimize the difference between predicted and actual rating (RMSE/MSE)

    資料集：波士頓房產
    特徵：房地產客觀數據，如年份、平面大小
    預測目標：房地產價格
    機器學習方法：ANN

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from keras.datasets import boston_housing
from sklearn.metrics import mean_squared_error
import keras
from sklearn.metrics import r2_score

In [None]:
data=pd.read_csv('Boston.csv',index_col=0)
print(data.shape)
x=data.drop(['medv'], axis=1)
y=data['medv']
data.head(10)
y.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=5)

In [None]:

# Note that the quantities used for normalizing the test data are computed using the
# training data. You should never use in your workflow any quantity computed on the
# test data, even for something as simple as data normalization.

mean = X_train.mean(axis=0)

X_train -= mean
std = X_train.std(axis=0)
X_train /= std

X_test -= mean
X_test /= std
print('mean:',mean);

In [None]:
from keras import models
from keras import layers

def build_model():
    model = models.Sequential()
    model.add(layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1))

    model.compile(optimizer='adam', #adam,sgd,rmsprop
              loss='mse',
              metrics=[r2_score]) 
    return model

In [None]:
model = build_model()
model.summary()


https://www.tensorflow.org/api_docs/python/tf/keras/callbacks/Callback#methods_2

In [None]:


class Metrics(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self._data = []

    def on_epoch_end(self, batch, logs={}):

        X_val, y_val = self.val_data[0], self.val_data[1]
        y_predict = np.asarray(model.predict(X_val))
        self._data.append({
            'val_r2_score': r2_score(y_val, y_predict),
            'val_mse':mse(y_val, y_predict),
        })
        return

    def get_data(self):
        return self._data

metrics = Metrics()

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
#train_history=model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=2)
train_history=model.fit(X_train, y_train, epochs=100, batch_size=16, verbose=2 ,validation_split=0.2,shuffle=True) #, callbacks=[metrics]);  #shuffle default is true

In [None]:
r2_score(y_train,model.predict(X_train))

In [None]:
r2_score(y_test,model.predict(X_test))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(18,4))
plt.subplot(1, 2, 1)

plt.suptitle('Optimizer : rmsprop', fontsize=12)
'''
plt.ylabel('R2 Score', fontsize=20)
score=metrics.get_data()
r2_score=[score[x]['val_r2_score'] for x in range(len(score)) ]
plt.plot(r2_score, color='r', label='R2 Score')
plt.legend(loc='lower right')
'''
plt.subplot(1, 2, 2)
plt.ylabel('Loss', fontsize=20)
plt.plot(train_history.history['loss'], color='b', label='Training Loss')
plt.plot(train_history.history['val_loss'], color='r', label='Validation Loss')
plt.legend(loc='upper right')
plt.show()

In [None]:
def mse(predictions,y):
    m = len(y)
    cost =(1/m) * np.sum((predictions-y)**2)
    return cost



In [None]:
mse(model.predict(X_train.values),y_train.values.reshape(-1,1))

In [None]:
train_mse_score,train_mse_score = model.evaluate(X_train, y_train)
train_mse_score

In [None]:
test_mse_score, test_mae_score = model.evaluate(X_test, y_test)
test_mse_score

In [None]:
model.predict(X_test[:10])

In [None]:
y_test.values[:10]

In [None]:
def scatter_plot(predict_data, actual_data,size):
    plt.title('scatter plot',fontsize='large') #set title
    #plt.scatter(np.arange(size),actual_data,label='real y')  #set size : s=3; 
    #plt.scatter(np.arange(size),predict_data,label='predicted y')
    #plt.legend(loc='lower right')
    plt.scatter(actual_data,predict_data)
    plt.plot([actual_data.min(), actual_data.max()], [actual_data.min(), actual_data.max()], 'r', lw=2)
    plt.xlabel('actual y')
    plt.ylabel('predicted y')
    plt.grid(b=True, linewidth=0.3)
    print("size:",size)
    print("mse:",mean_squared_error(predict_data, actual_data))

In [None]:
#ploting for all training data 
scatter_plot(model.predict(X_train),y_train.values,len(y_train))

In [None]:
#ploting for training data 
scatter_plot(model.predict(X_train[:30]),y_train[:30],len(y_train[:30]))

In [None]:
#ploting for test data 
scatter_plot(model.predict(X_test),y_test,len(y_test))

In [None]:
y_test.values.shape

In [None]:
predictions=model.predict(X_test.values)
np.ravel(predictions)

In [None]:
import seaborn as sns
predictions=model.predict(X_test.values)
error = y_test.values - np.ravel(predictions)
print(np.mean(error))

print(np.std(error))
sns.distplot(error)  #誤差分佈 

In [None]:
error