In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import datasets

# Normalizacion
from sklearn.preprocessing import MinMaxScaler

# Train, Test
from sklearn.model_selection import train_test_split

# Metricas
from sklearn.metrics import jaccard_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [2]:
df=pd.read_csv('../data/datos_procesados.csv')
X = df.drop(["Precio_log"],axis=1)
y = df["Precio_log"]

In [3]:
# Normalización de datos

x_scaler = MinMaxScaler()
X = x_scaler.fit_transform(X)

X

array([[0.82142857, 1.        , 0.12121212, ..., 0.        , 0.        ,
        0.        ],
       [0.46428571, 0.33333333, 0.08080808, ..., 0.        , 0.        ,
        0.        ],
       [0.39285714, 1.        , 0.14848485, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.85714286, 1.        , 0.06565657, ..., 0.        , 0.        ,
        0.        ],
       [0.82142857, 1.        , 0.0959596 , ..., 0.        , 0.        ,
        0.        ],
       [0.82142857, 1.        , 0.09090909, ..., 0.        , 0.        ,
        0.        ]])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

print(f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print(f"X_test: {X_test.shape},  y_test: {y_test.shape}")

X_train: (36443, 40), y_train: (36443,)
X_test: (15619, 40),  y_test: (15619,)


In [5]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC



In [6]:
model = AdaBoostRegressor(n_estimators = 50)
model.fit(X_train, y_train)

In [7]:
yhat = model.predict(X_test)

yhat

array([10.13608252,  9.57307788,  9.70277311, ..., 10.8052002 ,
        9.81770317, 10.00968606])

In [8]:
# Relative Absolute Error
RAE = np.sum(np.abs(np.subtract(y_test, yhat))) / np.sum(np.abs(np.subtract(y_test, np.mean(y_test))))

# Relative Square Error
RSE = np.sum(np.square(np.subtract(y_test, yhat))) / np.sum(np.square(np.subtract(y_test, np.mean(y_test))))

# Adjusted R**2
r2_ajustada = 1 - (1 - model.score(X_test, y_test))*(len(y_test) - 1)/(len(y_test) - X_test.shape[1] - 1)

In [9]:
print(f"MAE:\t {mean_absolute_error(y_test, yhat)}")
print(f"MSE:\t {mean_squared_error(y_test, yhat)}")
print(f"RMSE:\t {np.sqrt(mean_squared_error(y_test, yhat))}")
print(f"R**2:\t {r2_score(y_test, yhat)}")
print(f"RAE:\t {RAE}")
print(f"RSE:\t {RSE}")
print(f"Adjusted R**2:\t {r2_ajustada}")

MAE:	 0.23772574110746156
MSE:	 0.09266707854846809
RMSE:	 0.3044126780350452
R**2:	 0.6699513745496679
RAE:	 0.6063265374639092
RSE:	 0.33004862545033214
Adjusted R**2:	 0.6691039008676796
