In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("https://raw.githubusercontent.com/selva86/datasets/master/Sacramento.csv")

In [None]:
df.head()

Unnamed: 0,city,zip,beds,baths,sqft,type,price,latitude,longitude
0,SACRAMENTO,z95838,2,1.0,836,Residential,59222,38.631913,-121.434879
1,SACRAMENTO,z95823,3,1.0,1167,Residential,68212,38.478902,-121.431028
2,SACRAMENTO,z95815,2,1.0,796,Residential,68880,38.618305,-121.443839
3,SACRAMENTO,z95815,2,1.0,852,Residential,69307,38.616835,-121.439146
4,SACRAMENTO,z95824,2,1.0,797,Residential,81900,38.51947,-121.435768


## Split Train-Test

In [None]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=42)

## Creación de modelo líneal múltiple y evaluación



In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline

categorical_features = ['city', 'zip', 'type']
numerical_features = ['sqft', 'beds', 'baths']
features = categorical_features + numerical_features
target = 'price'

ct = ColumnTransformer([
    ('onehot', OneHotEncoder(), categorical_features)
])

X_train = df_train[features]
y_train = df_train[target]
X_test = df_test[features]
y_test = df_test[target]


model = make_pipeline(ct, LinearRegression())
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error Test: \t{mse}")

y_pred_train = model.predict(X_train)
mse = mean_squared_error(y_train, y_pred_train)
print(f"Mean Squared Error Train: \t{mse}")

Mean Squared Error Test: 	10424160595.847422
Mean Squared Error Train: 	6983008129.57997


## Creación de modelo polinómico múltiple y evaluación

In [None]:
from sklearn.preprocessing import PolynomialFeatures

categorical_features = ['city', 'zip', 'type']
numerical_features = ['sqft', 'beds', 'baths']
features = categorical_features + numerical_features
target = 'price'

degree = 2
poly_features = PolynomialFeatures(degree=degree)

ct = ColumnTransformer([
    ('onehot', OneHotEncoder(), categorical_features),
    ('polynomial', poly_features, numerical_features)
])

X_train = df_train[features]
y_train = df_train[target]
X_test = df_test[features]
y_test = df_test[target]


model = make_pipeline(ct, LinearRegression())
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error Test: \t{mse}")

y_pred_train = model.predict(X_train)
mse = mean_squared_error(y_train, y_pred_train)
print(f"Mean Squared Error Train: \t{mse}")

Mean Squared Error Test: 	7950495961.564276
Mean Squared Error Train: 	6343144838.624906
