<a href="https://colab.research.google.com/github/LuizSampaio-cpu/Colab-Kaggle/blob/main/house_prices_kaggle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

House Prices - Advanced Regression Techniques

In [1]:
# Passo 1: Carregar os arquivos para o Colab
from google.colab import files
uploaded = files.upload()

# Passo 2: Importar bibliotecas necessárias
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Passo 3: Carregar os dados de treino e teste
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Passo 4: Separar features e target
X = train.drop(['SalePrice', 'Id'], axis=1)
y = train['SalePrice']
test_ids = test['Id']
test = test.drop('Id', axis=1)

# Passo 5: Dividir os dados de treino para validação
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Passo 6: Listar colunas numéricas e categóricas
numeric_features = X.select_dtypes(include=[np.number]).columns
categorical_features = X.select_dtypes(include=['object']).columns

# Passo 7: Pipelines de pré-processamento
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Passo 8: Pipeline completo com modelo de Regressão Linear
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())])

# Passo 9: Treinar o modelo
model.fit(X_train, y_train)

# Passo 10: Avaliar o modelo
y_pred = model.predict(X_valid)
rmse = np.sqrt(mean_squared_error(y_valid, y_pred))
print(f'RMSE: {rmse}')

# Passo 11: Prever nos dados de teste
test_preds = model.predict(test)

# Passo 12: Gerar o arquivo de submissão
submission = pd.DataFrame({'Id': test_ids, 'SalePrice': test_preds})
submission.to_csv('submission.csv', index=False)

# Passo 13: Baixar o arquivo de submissão
files.download('submission.csv')


Saving train.csv to train.csv
Saving test.csv to test.csv
RMSE: 65387.55949158339


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>