In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-dark')

In [3]:
vehiculos = pd.read_csv("vehiculos_procesado.csv", usecols = ["consumo","cilindros","desplazamiento", "co2"])
litros_por_galon = 3.78541
vehiculos["consumo_litros_milla"] = litros_por_galon / vehiculos.consumo                        

In [4]:
vehiculos.shape

(35539, 5)

In [5]:
vehiculos.head()

Unnamed: 0,desplazamiento,cilindros,consumo,co2,consumo_litros_milla
0,2.5,4.0,17,522.764706,0.222671
1,4.2,6.0,13,683.615385,0.291185
2,2.5,4.0,16,555.4375,0.236588
3,4.2,6.0,13,683.615385,0.291185
4,3.8,6.0,16,555.4375,0.236588


In [6]:
from IPython.display import Image
#mostrar imagen de formula de regresión lineal

In [8]:
pct_entranamiento = 0.8
vehiculos_train = vehiculos.sample(frac = pct_entranamiento)
vehiculos_test = vehiculos[~vehiculos.index.isin(vehiculos_train.index)]

print(vehiculos_train.shape)
print(vehiculos_test.shape)

(28431, 5)
(7108, 5)


In [9]:
variables_independientes = ["desplazamiento","cilindros", "consumo_litros_milla"]
variable_dependiente ='co2'

X = vehiculos_train[variables_independientes].values
y = vehiculos_train[variable_dependiente].values

In [10]:
X

array([[2.        , 4.        , 0.13519321],
       [1.6       , 4.        , 0.14020037],
       [3.7       , 6.        , 0.21030056],
       ...,
       [3.5       , 6.        , 0.1514164 ],
       [3.3       , 6.        , 0.22267118],
       [4.        , 6.        , 0.23658813]])

In [11]:
y

array([317.        , 329.14814815, 493.72222222, ..., 358.        ,
       522.76470588, 555.4375    ])

In [12]:
X_T = X.T
X_T

array([[2.        , 1.6       , 3.7       , ..., 3.5       , 3.3       ,
        4.        ],
       [4.        , 4.        , 6.        , ..., 6.        , 6.        ,
        6.        ],
       [0.13519321, 0.14020037, 0.21030056, ..., 0.1514164 , 0.22267118,
        0.23658813]])

In [15]:
# Calculos de los coeficientes Beta
betas = np.linalg.inv(X_T @ X) @ X_T @ y
betas

array([ 3.73703682e+00, -6.20494994e-01,  2.30702088e+03])

In [16]:
# Calculo de Alfa
alfa = y.mean() - np.dot(betas,vehiculos_train[variables_independientes].mean().values)
alfa

0.35689448820812686

In [18]:
def predecir(r):
    return alfa + np.dot(betas, r.values)

In [19]:
vehiculos_train["co2_pred"] = vehiculos_train[variables_independientes].apply(
predecir,axis = 1)

In [20]:
vehiculos_train

Unnamed: 0,desplazamiento,cilindros,consumo,co2,consumo_litros_milla,co2_pred
19973,2.0,4.0,28,317.000000,0.135193,317.242557
26520,1.6,4.0,27,329.148148,0.140200,327.299356
19670,3.7,6.0,18,493.722222,0.210301,495.628734
23437,3.2,6.0,21,423.190476,0.180258,424.450534
26391,4.0,6.0,16,555.437500,0.236588,557.395817
...,...,...,...,...,...,...
14991,4.3,6.0,16,555.437500,0.236588,558.516928
3120,4.9,8.0,15,592.466667,0.252361,595.905743
259,3.5,6.0,25,358.000000,0.151416,359.034350
26134,3.3,6.0,17,522.764706,0.222671,522.673200


In [21]:
vehiculos_test["co2_pred"] = vehiculos_test[variables_independientes].apply(
predecir,axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vehiculos_test["co2_pred"] = vehiculos_test[variables_independientes].apply(


In [22]:
vehiculos_test

Unnamed: 0,desplazamiento,cilindros,consumo,co2,consumo_litros_milla,co2_pred
3,4.2,6.0,13,683.615385,0.291185,684.100242
4,3.8,6.0,16,555.437500,0.236588,556.648409
8,2.3,4.0,22,403.954545,0.172064,403.425550
10,3.0,6.0,20,444.350000,0.189271,444.496031
14,2.5,5.0,20,444.350000,0.189271,443.248007
...,...,...,...,...,...,...
35502,2.0,4.0,25,353.000000,0.151416,354.669785
35520,5.6,8.0,13,683.615385,0.291185,688.091104
35522,5.6,8.0,13,683.615385,0.291185,688.091104
35526,5.6,8.0,13,683.615385,0.291185,688.091104


In [25]:
#Encontrar Error
def error_cuadriatico_medio(y,y_pred):
    return np.sum((y-y_pred)**2)/len(y)

In [26]:
error_test = error_cuadriatico_medio(vehiculos_test.co2,vehiculos_test.co2_pred )
error_test

137.77152491881418