In [None]:
# KAGGLE HOUSE PRICE SUBMISSION

import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# 1. Load Data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# 2. Separate target and features
X = train.drop("sale_price", axis=1)
y = train["sale_price"]

# 3. Identify column types
numeric_features = X.select_dtypes(include=["int64", "float64"]).columns
categorical_features = X.select_dtypes(include=["object"]).columns

# 4. Preprocessing for numerical data
numeric_transformer = SimpleImputer(strategy="median")

# 5. Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

# 6. Combine preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

# 7. Define model
model = LinearRegression()

# 8. Create full pipeline
clf = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("model", model)
])

# 9. Train model
clf.fit(X, y)

# 10. Predict on test data
predictions = clf.predict(test)

# 11. Create submission file
submission = pd.DataFrame({
    "id": test["id"],
    "sale_price": predictions
})

submission.to_csv("submission.csv", index=False)

print("✅ submission.csv created successfully using Linear Regression!")

✅ submission.csv created successfully using Linear Regression!
