# Predicción con modelos de ml

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LinearRegression as LinReg

from sklearn import metrics
import numpy as np

## Preparamos los datos

In [2]:
df = pd.read_csv("datos_limpios.csv")

In [3]:
dicc_gas = {'E10':1,'SP98':0}

In [4]:
df['gas_num'] = df.gas_type.map(dicc_gas)

### Dividimos el data set en test y train y variable objetivo __CONSUMO__


In [5]:
X = df.drop(["consume","gas_type","refill gas"], axis=1)

In [6]:
X.head()

Unnamed: 0,distance,speed,temp_inside,temp_outside,AC,rain,sun,refill liters,snow,time,gas_num
0,28.0,26,21.5,12,0,0,0,45.0,0,1.076923,1
1,12.0,30,21.5,13,0,0,0,0.0,0,0.4,1
2,11.2,38,21.5,15,0,0,0,0.0,0,0.294737,1
3,12.9,36,21.5,14,0,0,0,0.0,0,0.358333,1
4,18.5,46,21.5,15,0,0,0,0.0,0,0.402174,1


In [7]:
y = df.consume

In [8]:
y.head()

0    5.0
1    4.2
2    5.5
3    3.9
4    4.5
Name: consume, dtype: float64

In [9]:
X_train, X_test, y_train, y_test = tts(X,y, test_size=0.2)

### Regresión lineal

In [10]:
linreg=LinReg()

In [11]:
linreg

LinearRegression()

In [12]:
linreg.fit(X_train, y_train)

LinearRegression()

In [13]:
linreg.coef_

array([ 0.05109789, -0.04603424, -0.06396241, -0.03586475,  0.53038224,
        0.56448863, -0.18581565, -0.01334067, -0.18084374, -3.2134378 ,
        0.11158914])

In [14]:
linreg.intercept_

8.958520748604407

In [15]:
y_pred = linreg.predict(X_test)

In [16]:
print('MAE - Error Medio Absoluto', metrics.mean_absolute_error(y_test, y_pred))
print('MSE - Error Cuadratico Medio', metrics.mean_squared_error(y_test, y_pred))
print('RMSE - Raiz Error Cuadratico Medio', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('R2 - Coeficiente de Determinacion', metrics.r2_score(y_test, y_pred))

MAE - Error Medio Absoluto 0.5505315372256793
MSE - Error Cuadratico Medio 0.5759304273610153
RMSE - Raiz Error Cuadratico Medio 0.7589008020558519
R2 - Coeficiente de Determinacion 0.25561677428951035


### Intentamos otros modelos

In [17]:
from sklearn.linear_model import Ridge, Lasso
from sklearn.linear_model import SGDRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR

In [18]:
models={
    'linreg': LinReg(),
    'ridge': Ridge(),
    'lasso': Lasso(),
    'sgd': SGDRegressor(),
    'knn': KNeighborsRegressor(),
    'grad': GradientBoostingRegressor(),
}

In [19]:
for name, model in models.items():
    print("ENTRENANDO: ", name)
    model.fit(X_train, y_train)

ENTRENANDO:  linreg
ENTRENANDO:  ridge
ENTRENANDO:  lasso
ENTRENANDO:  sgd
ENTRENANDO:  knn
ENTRENANDO:  grad


In [20]:
for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"------{name}------")
    print('MAE - ', metrics.mean_absolute_error(y_test, y_pred))
    print('MSE - ', metrics.mean_squared_error(y_test, y_pred))
    print('RMSE - ', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    print('R2 - ', metrics.r2_score(y_test, y_pred))

------linreg------
MAE -  0.5505315372256793
MSE -  0.5759304273610153
RMSE -  0.7589008020558519
R2 -  0.25561677428951035
------ridge------
MAE -  0.5401928172430882
MSE -  0.5737699245678379
RMSE -  0.7574760224375673
R2 -  0.2584091984469056
------lasso------
MAE -  0.6097643112013893
MSE -  0.7145463782080519
RMSE -  0.8453084515181732
R2 -  0.07645730688779151
------sgd------
MAE -  866893917791.8882
MSE -  9.470786932822281e+23
RMSE -  973179681909.8866
R2 -  -1.2240879439856125e+24
------knn------
MAE -  0.4407692307692308
MSE -  0.29202564102564116
RMSE -  0.540393968346836
R2 -  0.62256033310673
------grad------
MAE -  0.3831278288744068
MSE -  0.2959447775634445
RMSE -  0.5440080675536388
R2 -  0.617494895756289


### Como conclusión no parece viable predecir con precisión el consumo de manera general. Veamos con los datos segun longitud de trayecto.

In [21]:
cd = pd.read_csv("cd_limpios.csv")
md = pd.read_csv("md_limpios.csv")
ld = pd.read_csv("ld_limpios.csv")

In [22]:
cd['gas_num'] = cd.gas_type.map(dicc_gas)
md['gas_num'] = md.gas_type.map(dicc_gas)
ld['gas_num'] = ld.gas_type.map(dicc_gas)

# cd
### Dividimos el data set en test y train y variable objetivo __CONSUMO__


In [23]:
X = cd.drop(["consume","gas_type","refill gas"], axis=1)

In [24]:
X.head()

Unnamed: 0,distance,speed,temp_inside,temp_outside,AC,rain,sun,refill liters,snow,time,lkm,gas_num
0,28.0,26,21.5,12,0,0,0,45.0,0,1.076923,17.86,1
1,12.0,30,21.5,13,0,0,0,0.0,0,0.4,35.0,1
2,11.2,38,21.5,15,0,0,0,0.0,0,0.294737,49.11,1
3,12.9,36,21.5,14,0,0,0,0.0,0,0.358333,30.23,1
4,18.5,46,21.5,15,0,0,0,0.0,0,0.402174,24.32,1


In [25]:
y = cd.consume

In [26]:
y.head()

0    5.0
1    4.2
2    5.5
3    3.9
4    4.5
Name: consume, dtype: float64

In [27]:
X_train, X_test, y_train, y_test = tts(X,y, test_size=0.2)

In [28]:
models={
    'linreg': LinReg(),
    'ridge': Ridge(),
    'lasso': Lasso(),
    'sgd': SGDRegressor(),
    'knn': KNeighborsRegressor(),
    'grad': GradientBoostingRegressor(),
}

In [29]:
for name, model in models.items():
    print("ENTRENANDO: ", name)
    model.fit(X_train, y_train)

ENTRENANDO:  linreg
ENTRENANDO:  ridge
ENTRENANDO:  lasso
ENTRENANDO:  sgd
ENTRENANDO:  knn
ENTRENANDO:  grad


In [30]:
for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"------{name}------")
    print('MAE - ', metrics.mean_absolute_error(y_test, y_pred))
    print('MSE - ', metrics.mean_squared_error(y_test, y_pred))
    print('RMSE - ', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
    print('R2 - ', metrics.r2_score(y_test, y_pred))

------linreg------
MAE -  0.3450889330847283
MSE -  0.178549460344216
RMSE -  0.42255113340779954
R2 -  0.6390928747329445
------ridge------
MAE -  0.3473138578891215
MSE -  0.16797817358166683
RMSE -  0.4098514042694826
R2 -  0.6604609186827277
------lasso------
MAE -  0.49865934479644686
MSE -  0.35880341735807175
RMSE -  0.599002017824708
R2 -  0.27474040165088576
------sgd------
MAE -  2862246527931.198
MSE -  9.067496493611171e+24
RMSE -  3011228402763.758
R2 -  -1.8328389716605072e+25
------knn------
MAE -  0.31781250000000005
MSE -  0.21581875000000003
RMSE -  0.4645629666686746
R2 -  0.5637593947858013
------grad------
MAE -  0.1854290837087971
MSE -  0.09274215322941531
RMSE -  0.30453596377015196
R2 -  0.8125376360781068
