# Modelo de regressão de imóveis previsão de imovel Airbnb RJ

In [None]:
from platform import python_version

print('Versão do Python neste projeto:', python_version())

In [None]:
!pip install -q -U watermark

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go

In [None]:
%reload_ext watermark
%watermark -a "Rafael Gallo" --iversions

In [None]:
plt.style.use('seaborn-darkgrid')
sns.set_style("darkgrid") 

In [None]:
df = pd.read_csv("Dados/HousePrices_HalfMil.csv", sep = ";")
df.head()

In [None]:
df_train = pd.read_csv("Dados 2/calendar.csv")
df_test = pd.read_csv("Dados 2/listings_summary.csv")

In [None]:
df_train.head()

In [None]:
df_test.head()

In [None]:
df_train.tail()

In [None]:
df_test.tail()

In [None]:
df_test.columns = ["ID",
             "Nome",
             "hospede_id",
             "hospede_nome",
             "Bairro",
             "Vizinhança",
             "latitude",
             "longitude",
             "Tipo de sala",
             "Preço",
             "Noites mínimas",
             "Número de comentários",
             "Última revisão",
             "Avaliações por mês",
             "Contagem de listagens de host calculada",
             "Disponibilidade 365"]

df_test.head()

# Análise Exploratória

In [None]:
df_train.shape

In [None]:
df_test.shape

In [None]:
df_train.columns

In [None]:
df_test.columns

In [None]:
df_train.dtypes

In [None]:
df_test.dtypes

In [None]:
df_train.describe()

In [None]:
df_test.describe()

In [None]:
df_train_corr = df_train.corr()
df_train_corr

In [None]:
df_test_corr = df_test.corr()
df_test_corr

In [None]:
fig = plt.figure(figsize = (12, 9))

sns.heatmap(df_train_corr,  cmap = 'plasma', annot = True);
plt.show()

In [None]:
fig = plt.figure(figsize = (12, 9))

sns.heatmap(df_test_corr,  cmap = 'plasma', annot= True);
plt.show()

In [None]:
plt.figure(figsize=(18, 8))

ax = sns.scatterplot(x="Preço", y="Noites mínimas", hue = 'Bairro', data=df_test)
ax.set_title('Condomínio x Preco')
ax.set_ylabel('Condomínio')
ax.set_xlabel('Preco')

In [None]:
plt.figure(figsize=(10, 8))

sns.lineplot(x = "Noites mínimas", y = "Preço", data = df_test)

In [None]:
plt.figure(figsize=(10, 8))

sns.lineplot(x = "Avaliações por mês", y = "Preço", data = df_test)

In [None]:
plt.figure(figsize=(10, 6))

sns.scatterplot(x = "latitude", y = "longitude", data = df_test, hue = "Tipo de sala")

In [None]:
apt = df_test[["latitude", "longitude", "Preço"]]

fig = px.scatter_mapbox(apt,
                       lat = "latitude",
                       lon = "longitude",
                       size = "Preço",
                       color_continuous_scale = px.colors.cyclical.IceFire, 
                       size_max = 15, 
                       zoom = 10)

fig.update_layout(
        title = 'Mapa região de apartamentos Rio de Janeiro',
)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
apt = df_test[["latitude", "longitude", "Disponibilidade 365"]]

fig = px.scatter_mapbox(apt,
                       lat = "latitude",
                       lon = "longitude",
                       size = "Disponibilidade 365",
                       color_continuous_scale = px.colors.cyclical.IceFire, 
                       size_max = 15, 
                       zoom = 10)

fig.update_layout(
        title = 'Mapa região de apartamentos Nova York',
)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

# Limpeza de dados

In [None]:
df_test.info()

In [None]:
df_test.drop(["ID", "Nome", "latitude", "longitude", "hospede_id", "Última revisão"], axis = 1, inplace = True)
df_test.drop(["hospede_nome"], axis = 1, inplace = True)
df_test.head()

In [None]:
df_test["Avaliações por mês"] = df_test["Avaliações por mês"].fillna(0)
df_test

In [None]:
df_test.isna().sum()

In [None]:
x = df_test[["Preço"]].values.reshape(-1,1)
y = df_test["Preço"].values.reshape(-1,1)

In [None]:
x

In [None]:
y

In [None]:
x.shape

In [None]:
y.shape

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler_x = scaler.fit_transform(x)
scaler_y = scaler.fit_transform(y)

In [None]:
scaler_x

In [None]:
scaler_y

In [None]:
scaler_x.shape

In [None]:
scaler_y.shape

# Treino e Teste

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(scaler_x, scaler_y, test_size = 0.2, random_state = 0)

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
X_test.shape

In [None]:
y_test.shape

# Modelo de regressão linear

In [None]:
from sklearn.linear_model import LinearRegression

reg_line = LinearRegression()
reg_line.fit(X_train, y_train)
reg_pred = reg_line.predict(X_train)
reg_pred

In [None]:
reg_line.score(X_train, y_train)

In [None]:
reg_line.coef_

In [None]:
reg_line.coef_ * 27.74456356 + reg_line.intercept_

In [None]:
pred = reg_line.predict(X_train)
pred

In [None]:
y_pred = reg_line.predict(X_test)
y_pred

In [None]:
pred2 = y_train - pred
pred2

In [None]:
plt.figure(figsize=(18, 8))
plt.scatter(pred, y_train)
plt.plot(pred, reg_line.predict(X_train), color = "red")
plt.title("Grafico de regressão linear - AirBnb RJ", fontsize = 20)
plt.xlabel("Valor")
plt.ylabel("Valor do imóvel")
plt.legend(["Valor", "Imóvel"])

In [None]:
ax = sns.distplot(pred)
ax.figure.set_size_inches(20, 8)
ax.set_title('Distribuição de Frequências dos Resíduos', fontsize=18)
ax.set_xlabel('Imóvel', fontsize=14)
ax

In [None]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn import metrics
from math import sqrt

print('MSE:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('MAE:', metrics.mean_absolute_error(y_test, y_pred))
print("RMSE", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2", r2_score(y_test, y_pred))

# Previsão do imovel

In [None]:
X_test[0:1]

In [None]:
a1 = X_test[0: 1]
a1

In [None]:
pred = reg_line.predict(a1)[0]
pred

# Salvando o modelo de regressão linear

In [None]:
import pickle

saida = open("modelo_previsao_imóvel_AirBnb-RJ", "wb")
saida.close()