# Normalizando Dados

In [5]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


data = pd.read_csv('imovel_limpo.csv')


In [9]:



dados = pd.DataFrame({
    'quartos': list(data.get('Quartos')),
    'suites': list(data.get('Suítes')),
    'area': list(data.get('Área do imóvel')),
    'tipo': list(data.get('Tipo do imóvel')),
    'bairro': list(data.get('Bairro')),   
    'preco': list(data.get('Preço'))
})
dados = dados.dropna()

# --- Limpeza dos dados numéricos ---
dados['area'] = (
    dados['area']
    .astype(str)
    .str.replace('m²', '', regex=False)
    .str.replace(' ', '', regex=False)
    .str.replace(',', '.', regex=False)
    .astype(float)
)
dados['preco'] = (
    dados['preco']
    .astype(str)
    .str.replace('R$', '', regex=False)
    .str.replace(' ', '', regex=False)
    .str.replace(',00', '', regex=False)
    .str.replace('.', '', regex=False)
    .astype(float)
)

X = dados[['quartos', 'suites', 'area', 'tipo', 'bairro']]
y = dados['preco']

colunas_categoricas = ['bairro','tipo']
colunas_numericas = ['quartos', 'suites', 'area']

preprocessador = ColumnTransformer([
    ('cat', OneHotEncoder(drop='first' ,handle_unknown='ignore'), colunas_categoricas)
], remainder='passthrough')

# --- Divide treino/teste ---
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Cria e treina o modelo ---
modelo = LinearRegression()
modelo.fit(preprocessador.fit_transform(X_treino), y_treino)

# --- Resultados ---
print("Coeficientes:", modelo.coef_)
print("Intercepto:", modelo.intercept_)
print("Score R² no teste:", modelo.score(preprocessador.transform(X_teste), y_teste))

# --- Predição de um novo imóvel ---
novo = pd.DataFrame({
    'quartos': [2],
    'suites': [1],
    'tipo': ['Residencial'],
    'area': [70],
    'bairro': ['Centro']
})

preco_previsto = modelo.predict(preprocessador.transform(novo))
print("Preço estimado:", preco_previsto)



Coeficientes: [ 2.62940504e+04  1.55840114e+04  1.82154727e+04  1.90870830e+04
 -5.34131731e+03  8.83051528e+04  5.95385750e+04  9.29454013e+03
  2.16829718e+04  7.23585825e+03  1.92496016e+04  1.50645070e+04
  1.16079544e+04  1.41829707e+04 -2.55331624e+03  9.67664585e+03
 -4.34780876e+03 -1.37737643e+04  1.84343150e+04  2.70140832e+04
  3.89286608e+04  2.53515784e+04  1.15462625e+04  2.26166090e+04
  1.65942107e+04  2.82828032e+04  1.47770609e+04  9.60386564e+03
  9.56619425e+03  2.63304028e+04  7.01652230e+03  7.25110333e+03
  4.30677452e+03  2.15064104e+03 -2.06546458e+03  2.48159868e+04
  4.33813746e+04 -1.60219142e+04  4.50122983e+04 -3.09137146e+04
 -4.32956149e+03 -2.82884439e+03 -4.89801468e+04 -3.46150007e+04
 -2.69233105e+04  6.39810208e+03 -1.07575023e+05 -5.79216037e+05
  2.38071619e+04  4.90967876e+03  3.24871214e+04 -1.09877293e+03
  2.33178931e+04 -4.32280340e+03  9.30642100e+03  1.24714403e+04
  5.95793108e+03  1.36387417e+04  9.87058233e+03  9.83695396e+03
 -1.4994477

