In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, RobustScaler
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
df = pd.read_csv('cal_housing.csv')
df

Unnamed: 0,longitude,latitude,housingMedianAge,totalRooms,totalBedrooms,population,households,medianIncome,medianHouseValue
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0
...,...,...,...,...,...,...,...,...,...
20635,-121.09,39.48,25.0,1665.0,374.0,845.0,330.0,1.5603,78100.0
20636,-121.21,39.49,18.0,697.0,150.0,356.0,114.0,2.5568,77100.0
20637,-121.22,39.43,17.0,2254.0,485.0,1007.0,433.0,1.7000,92300.0
20638,-121.32,39.43,18.0,1860.0,409.0,741.0,349.0,1.8672,84700.0


In [3]:
x = df.drop('medianHouseValue', axis=1).values
y = df['medianHouseValue'].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0, shuffle=True)

In [4]:
def TransformaryEscalar(x,grado,escalador):
    x_transformado = x.copy()
    if grado > 1:
        poly = PolynomialFeatures(degree=grado)
        x_transformado = poly.fit_transform(x_transformado)
        if escalador:
            x_transformado = escalador.fit_transform(x_transformado)
    else:
        x_transformado = escalador.fit_transform(x_transformado)
    return x_transformado

In [5]:
def Regresiones(x_train, x_test, y_train, y_test):
    grados = [2, 3]
    escaladores = [None, StandardScaler(), RobustScaler()]
    
    model = LinearRegression()
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f'Regresión Lineal: \tMSE: {mse} \tR2: {r2}')

    for grado in grados:
        for escalador in escaladores:
            x_train_poly = TransformaryEscalar(x_train, grado, escalador)
            x_test_poly = TransformaryEscalar(x_test, grado, escalador)
            model = LinearRegression()
            model.fit(x_train_poly, y_train)
            y_pred = model.predict(x_test_poly)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            print(f'Regresión Polinomial de grado {grado} con escalador {escalador}: \tMSE: {mse} \tR2: {r2}')
            
Regresiones(x_train, x_test, y_train, y_test)


Regresión Lineal: 	MSE: 4853781771.947972 	R2: 0.6277645980446447
Regresión Polinomial de grado 2 con escalador None: 	MSE: 4076152899.55463 	R2: 0.687400776490149
Regresión Polinomial de grado 2 con escalador StandardScaler(): 	MSE: 4111950340.7117085 	R2: 0.6846554790037458
Regresión Polinomial de grado 2 con escalador RobustScaler(): 	MSE: 5047513814.436847 	R2: 0.6129073324946657
Regresión Polinomial de grado 3 con escalador None: 	MSE: 4167021206.2995405 	R2: 0.6804321070535295
Regresión Polinomial de grado 3 con escalador StandardScaler(): 	MSE: 8862039818.079517 	R2: 0.32037221514734526
Regresión Polinomial de grado 3 con escalador RobustScaler(): 	MSE: 8684145318357.839 	R2: -664.9850967960448
