In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("Dados\kc-house-data.csv")
df.head()

In [None]:
df.tail(10)

In [None]:
df.info()

# Visualização de dados

In [None]:
sns.scatterplot(x="sqft_living", y="price", data = df)

In [None]:
f, ax = plt.subplots(figsize = (20, 10))

sns.heatmap(df.corr(), annot = True)

In [None]:
df.hist(bins = 20, figsize = (20, 20), color = "g")

# Tratamento da base de dados

In [None]:
selected_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'sqft_above', 'sqft_basement']
X = df[selected_features]
y = df["price"]
X.head()

In [None]:
y.head()

In [None]:
X.shape

In [None]:
y.shape

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
x_scaler = scaler.fit_transform(X)
x_scaler

In [None]:
x_scaler.shape

In [None]:
scaler.data_max_

In [None]:
scaler.data_min_

In [None]:
y = y.values.reshape(-1, 1)
y

In [None]:
y.shape

In [None]:
y_scaled = scaler.fit_transform(y)
y_scaled

In [None]:
#Treino e teste

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_scaler, y_scaled,
                                                    test_size = 0.25)

In [None]:
x_train.shape

In [None]:
x_test.shape

# ANNR - Modelo

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(units = 100, activation = "relu", input_shape = (7, )))
model.add(tf.keras.layers.Dense(units = 100, activation = "relu"))
model.add(tf.keras.layers.Dense(units = 100, activation = "relu"))
model.add(tf.keras.layers.Dense(units = 1, activation = "linear"))

model.summary()
model.compile(optimizer = "Adam", loss = "mean_squared_error")

In [None]:
hist = model.fit(x_train, y_train, epochs = 100, batch_size = 50, validation_split = 0.2)

In [None]:
hist.history.keys()

In [None]:
plt.plot(hist.history["loss"])
plt.plot(hist.history["val_loss"])
plt.title('Model Loss Progress During Training')
plt.xlabel('Epoch')
plt.ylabel('Training and Validation Loss')
plt.legend(['Training Loss', 'Validation Loss']);

# Previsão

In [None]:
#Previsão dos dados
# 'bedrooms','bathrooms','sqft_living','sqft_lot','floors', 'sqft_above', 'sqft_basement'
X_test_1 = np.array([[ 4, 3, 1960, 5000, 1, 2000, 3000 ]])

scaler_1 = MinMaxScaler()
X_test_scaled_1 = scaler_1.fit_transform(X_test_1)

y_predict_1 = model.predict(X_test_scaled_1)

y_predict_1 = scaler.inverse_transform(y_predict_1)
y_predict_1

In [None]:
y_predict = model.predict(x_test)
plt.plot(y_test, y_predict, "o", color = "g")
plt.xlabel("Model Predictions")
plt.ylabel("True Values")

In [None]:
y_predict_orig = scaler.inverse_transform(y_predict)
y_predict_orig

In [None]:
y_test_orig = scaler.inverse_transform(y_test)
y_test_orig

In [None]:
plt.plot(y_test_orig, y_predict_orig, "o", color = "g")
plt.xlabel('Model Predictions')
plt.ylabel('True Values')
plt.xlim(0, 5000000)
plt.ylim(0, 3000000);

In [None]:
k = x_test.shape[1]
k

In [None]:
n = len(x_test)
n

**Metricas RMSE, MSE, MAE, R2, Adj r2**

In [None]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from math import sqrt

RMSE = float(format(np.sqrt(mean_squared_error(y_test_orig, y_predict_orig)), ".3f"))
MSE = mean_squared_error(y_test_orig, y_predict_orig)
MAE = mean_absolute_error(y_test_orig, y_predict_orig)
R2 = r2_score(y_test_orig, y_predict_orig)
adj_r2 = 1 - (1 - R2) * (n - 1) / (n - k - 1)

In [None]:
RMSE

In [None]:
MSE

In [None]:
MAE

In [None]:
R2

In [None]:
adj_r2

In [None]:
print('RMSE =',RMSE, '\nMSE =',MSE, '\nMAE =',MAE, '\nR2 =', R2, '\nAdjusted R2 =', adj_r2) 

# Aumento da quantidade de características

In [None]:
selected_features = ['bedrooms','bathrooms','sqft_living','sqft_lot','floors', 'sqft_above', 'sqft_basement', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'yr_built', 
'yr_renovated', 'zipcode', 'lat', 'long', 'sqft_living15', 'sqft_lot15']

X = df[selected_features]

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_scaled

In [None]:
y = df["price"]
y

In [None]:
y = y.values.reshape(-1, 1)
y_scaled = scaler.fit_transform(y)
y

In [None]:
y_scaled

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size = 0.25)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
model2 = tf.keras.models.Sequential()
model2.add(tf.keras.layers.Dense(units = 100, activation = "relu", input_shape = (19, )))
model2.add(tf.keras.layers.Dense(units = 100, activation = "relu"))
model2.add(tf.keras.layers.Dense(units = 100, activation = "relu"))
model2.add(tf.keras.layers.Dense(units = 1, activation = "linear"))

model2.compile(optimizer = "Adam", loss = "mean_squared_error")
model2.summary()

In [None]:
hist = model2.fit(X_train, y_train, epochs = 100, batch_size = 50, validation_split = 0.2)

In [None]:
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('Model Loss Progress During Training')
plt.ylabel('Training and Validation Loss')
plt.xlabel('Epoch number')
plt.legend(['Training Loss', 'Validation Loss']);

In [None]:
y_predict = model.predict(X_test)
plt.plot(y_test, y_predict, "o", color = 'b')
plt.xlabel("Model Predictions")
plt.ylabel("True Value (ground Truth)")
plt.title('Linear Regression Predictions')
plt.show()

In [None]:
y_predict_orig = scaler.inverse_transform(y_predict)
y_predict_orig 

In [None]:
y_test_orig = scaler.inverse_transform(y_test)
y_test_orig

In [None]:
RMSE = float(format(np.sqrt(mean_squared_error(y_test_orig, y_predict_orig)),'.3f'))
MSE = mean_squared_error(y_test_orig, y_predict_orig)
MAE = mean_absolute_error(y_test_orig, y_predict_orig)
r2 = r2_score(y_test_orig, y_predict_orig)
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)

print('RMSE =',RMSE, '\nMSE =',MSE, '\nMAE =',MAE, '\nR2 =', r2, '\nAdjusted R2 =', adj_r2) 