In [1]:
import pandas as pd      # para trabajar con archivos de datos csv, excel, etc: https://pandas.pydata.org/docs/getting_started/tutorials.html
import chardet           # para detectar la codificación de texto en archivos
import numpy as np
from holoviews.examples.gallery.apps.bokeh.streaming_psutil import callback

from sklearn import preprocessing, model_selection, metrics, model_selection
from sklearn.model_selection import train_test_split


import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras import optimizers

from matplotlib import pyplot as plt
import plotly.express as px

nombre_archivo = '../Datos/Autos.csv' # archivo de hojas

#-- detectando la codificación de caracteres usada ----
with open(nombre_archivo, 'rb') as f:
    result = chardet.detect(f.read())  # or readline if the file is large

# recupera el archivo en un objeto dataframe de pandas utilizando la codificación detectada
# %% Carga de dataset con formato R, G, B, Color
df = pd.read_csv(nombre_archivo, encoding=result['encoding'])

df = df.select_dtypes(include=[np.number])
# Verificar los valores faltantes
print(df.isnull().sum())
df.head()


normalized-losses    41
wheel-base            0
length                0
width                 0
height                0
curb-weight           0
engine-size           0
bore                  4
stroke                4
compression-ratio     0
horsepower            2
peak-rpm              2
city-mpg              0
highway-mpg           0
price                 4
symboling             0
dtype: int64


Unnamed: 0,normalized-losses,wheel-base,length,width,height,curb-weight,engine-size,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price,symboling
0,,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111.0,5000.0,21,27,13495.0,3
1,,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111.0,5000.0,21,27,16500.0,3
2,,94.5,171.2,65.5,52.4,2823,152,2.68,3.47,9.0,154.0,5000.0,19,26,16500.0,1
3,164.0,99.8,176.6,66.2,54.3,2337,109,3.19,3.4,10.0,102.0,5500.0,24,30,13950.0,2
4,164.0,99.4,176.6,66.4,54.3,2824,136,3.19,3.4,8.0,115.0,5500.0,18,22,17450.0,2


In [2]:
# Completar valores faltantes para columnas numéricas
for column in df.select_dtypes(include=np.number).columns:
    df[column].fillna(df[column].mean(), inplace=True)

# Verificamos que este todo bien
print(df.isnull().sum())

# Separar las características (X) y las etiquetas (y)
X = df.drop(columns=['price', 'highway-mpg'])
y = df[['price', 'highway-mpg']]

# Convertir variables categóricas en variables dummy
X = pd.get_dummies(X, drop_first=True)

# Normalizar las características
scaler = preprocessing.StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


normalized-losses    0
wheel-base           0
length               0
width                0
height               0
curb-weight          0
engine-size          0
bore                 0
stroke               0
compression-ratio    0
horsepower           0
peak-rpm             0
city-mpg             0
highway-mpg          0
price                0
symboling            0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(df[column].mean(), inplace=True)


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

# Cargar y preparar tus datos
# (Ejemplo: df es tu DataFrame)

# Supongamos que ya tienes tus características X y etiquetas y definidas
X = df.drop(columns=['price', 'highway-mpg'])  # Ajusta según tus columnas
y = df[['price', 'highway-mpg']]

# Dividir en conjunto de entrenamiento y validación
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizar las características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Definir el modelo
modelo = Sequential()
modelo.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
modelo.add(Dense(2, activation='linear'))  # Ajusta la salida según tu problema

# Compilar el modelo
modelo.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Configurar el callback de parada temprana
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Entrenar el modelo con parada temprana
history = modelo.fit(X_train, y_train, validation_data=(X_val, y_val),
                      epochs=1000, batch_size=32, callbacks=[early_stop])


Epoch 1/1000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 115000040.0000 - mae: 6617.3843 - val_loss: 129854168.0000 - val_mae: 6756.5166
Epoch 2/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 112482344.0000 - mae: 6508.7847 - val_loss: 129850280.0000 - val_mae: 6756.3862
Epoch 3/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 103373680.0000 - mae: 6312.1450 - val_loss: 129846496.0000 - val_mae: 6756.2568
Epoch 4/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 115680096.0000 - mae: 6551.0166 - val_loss: 129842704.0000 - val_mae: 6756.1250
Epoch 5/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 105615304.0000 - mae: 6426.7437 - val_loss: 129838976.0000 - val_mae: 6755.9917
Epoch 6/1000
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 118048176.0000 - mae: 6633.7432 - val_loss: 129834912.0000 -