In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, RobustScaler, PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import LinearRegression

In [4]:
df_2 = pd.read_csv('./cal_housing.csv')
X = df_2[['longitude', 'latitude', 'housingMedianAge', 'totalRooms', 'totalBedrooms', 'population', 'households', 'medianIncome']].values
y = df_2['medianHouseValue'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 0, shuffle = True, train_size=0.8, test_size=0.2)

In [5]:
def fit_and_transform(x:np.array, degree:int, type:object):
    x_scaled = x.copy()
    if degree > 1:
        poly = PolynomialFeatures(degree=degree)
        x_scaled = poly.fit_transform(x_scaled)
        if type:
            x_scaled = type.fit_transform(x_scaled)
    else:
        x_scaled = type.fit_transform(x_scaled)
    return x_scaled

In [6]:
print("Modelo LR : MSE : R2")
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
lireg = ['Regresión Lineal', mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
print(lireg)

model = make_pipeline(PolynomialFeatures(degree=2, include_bias=False), LinearRegression())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
lreg_2_data = ['Regresión Polinomial de grado 2', mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
print(lreg_2_data)

standard = StandardScaler()
x_train_std = fit_and_transform(X_train, 2, standard)
x_test_std = fit_and_transform(X_test, 2, standard)

model = LinearRegression()
model.fit(x_train_std, y_train)
y_pred = model.predict(x_test_std)

lreg_2_std_data = ['Grado 2 con escalamiento estándar', mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
print(lreg_2_std_data)

robust = RobustScaler()
x_train_rbs = fit_and_transform(X_train, 2, robust)
x_test_rbs = fit_and_transform(X_test, 2, robust)

model = LinearRegression()
model.fit(x_train_rbs, y_train)
y_pred = model.predict(x_test_rbs)


lreg_2_rbs_data = ['Grado 2 con escalamiento robusto', mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
print(lreg_2_rbs_data)

x_train = fit_and_transform(X_train, 3, None)
x_test = fit_and_transform(X_test, 3, None)

model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)


lreg_3_data = ['Regresión Polinomial de grado 3', mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
print(lreg_3_data)

x_train = fit_and_transform(X_train, 3, None)
x_test = fit_and_transform(X_test, 3, None)

model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)


lreg_3_data = ['Regresión Polinomial de grado 3', mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
standard = StandardScaler()
x_train_std = fit_and_transform(X_train, 3, standard)
x_test_std = fit_and_transform(X_test, 3, standard)

model = LinearRegression()
model.fit(x_train_std, y_train)
y_pred = model.predict(x_test_std)


lreg_3_std_data = ['Grado 3 con escalamiento estándar', mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
print(lreg_3_std_data)

robust = RobustScaler()
x_train_rbs = fit_and_transform(X_train, 3, robust)
x_test_rbs = fit_and_transform(X_test, 3, robust)

model = LinearRegression()
model.fit(x_train_rbs, y_train)
y_pred = model.predict(x_test_rbs)


lreg_3_rbs_data = ['Grado 3 con escalamiento robusto', mean_squared_error(y_test, y_pred), r2_score(y_test, y_pred)]
print(lreg_3_rbs_data)

Modelo LR : MSE : R2
['Regresión Lineal', 4853781771.94797, 0.6277645980446447]
['Regresión Polinomial de grado 2', 4076152912.267421, 0.6874007755152081]
['Grado 2 con escalamiento estándar', 4111950340.711703, 0.6846554790037462]
['Grado 2 con escalamiento robusto', 5047513814.436849, 0.6129073324946656]
['Regresión Polinomial de grado 3', 4127511615.803442, 0.6834620884145384]
['Grado 3 con escalamiento estándar', 8862039818.049318, 0.3203722151496613]
['Grado 3 con escalamiento robusto', 8684145318246.899, -664.9850967875368]


### Resultados

In [28]:
data = [lireg, lreg_2_data, lreg_2_std_data, lreg_2_rbs_data, lreg_3_data, lreg_3_std_data, lreg_3_rbs_data]
results = pd.DataFrame(data, columns = ['Model', 'MSE', 'R^2'])
results

Unnamed: 0,Model,MSE,R^2
0,Regresión Lineal,4853782000.0,0.627765
1,Regresión Polinomial de grado 2,4076153000.0,0.687401
2,Grado 2 con escalamiento estándar,4111950000.0,0.684655
3,Grado 2 con escalamiento robusto,5047514000.0,0.612907
4,Regresión Polinomial de grado 3,4127512000.0,0.683462
5,Grado 3 con escalamiento estándar,8862040000.0,0.320372
6,Grado 3 con escalamiento robusto,8684145000000.0,-664.985097
