In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
# se utiliza para codificar variables categóricas en variables numéricas
from sklearn.preprocessing import LabelEncoder

In [None]:
# Cargar los datos desde el archivo CSV
data = pd.read_csv('/content/Smart watch prices.csv')

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 379 entries, 0 to 378
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Brand                      378 non-null    object 
 1   Model                      378 non-null    object 
 2   Operating System           376 non-null    object 
 3   Connectivity               378 non-null    object 
 4   Display Type               377 non-null    object 
 5   Display Size (inches)      376 non-null    float64
 6   Resolution                 375 non-null    object 
 7   Water Resistance (meters)  378 non-null    object 
 8   Battery Life (days)        378 non-null    object 
 9   Heart Rate Monitor         378 non-null    object 
 10  GPS                        378 non-null    object 
 11  NFC                        378 non-null    object 
 12  Price (USD)                378 non-null    object 
dtypes: float64(1), object(12)
memory usage: 38.6+ KB


In [None]:
columnas_eliminar = ['Price (USD)']
simbolo = '$'

# solo se pone en bucle una vez ya que hay un solo dato en la lista.
for columna in columnas_eliminar:
    # str.replace es metodo de pandas
    # se asegura que el símbolo "$" se trate como una cadena literal en lugar de una expresión regular
    data[columna] = data[columna].str.replace(simbolo, '', regex=False)

In [None]:
# Supongamos que 'data' es tu DataFrame
columna_categorica = 'Price (USD)'
columna_numerica = 'Price'

# Crea una nueva columna numérica y copia los valores de la columna categórica
# to_numeric se utiliza para cambiar de categoricas a numericas
# El parámetro errors='coerce' indica que si hay valores no numéricos se establescan como NaN en la nueva columna.
data[columna_numerica] = pd.to_numeric(data[columna_categorica], errors='coerce')

# Elimina la columna categórica original
# axis indica que solo se eliminara una columna
# inplace indica que se debe modificar directamente en el data
data.drop(columna_categorica, axis=1, inplace=True)

In [None]:
#variables categoricas
# con select_dtypes seleccionamos las columnas de data de tipo object
columnas_categoricas = data.select_dtypes(include=['object']).columns

In [None]:
#Procesamiento de frases o palabras
for columna in columnas_categoricas:
  # se crea una instancia (se utiliza para convetir categoricos a numericos)
  le = LabelEncoder()
  # cambia de categoricos a numericos unico
  data[columna] = le.fit_transform(data[columna])

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 379 entries, 0 to 378
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Brand                      379 non-null    int64  
 1   Model                      379 non-null    int64  
 2   Operating System           379 non-null    int64  
 3   Connectivity               379 non-null    int64  
 4   Display Type               379 non-null    int64  
 5   Display Size (inches)      376 non-null    float64
 6   Resolution                 379 non-null    int64  
 7   Water Resistance (meters)  379 non-null    int64  
 8   Battery Life (days)        379 non-null    int64  
 9   Heart Rate Monitor         379 non-null    int64  
 10  GPS                        379 non-null    int64  
 11  NFC                        379 non-null    int64  
 12  Price                      372 non-null    float64
dtypes: float64(2), int64(11)
memory usage: 38.6 KB


In [None]:
#llenar datos vacios
columnas_especificas = ['Price', 'Display Size (inches)']
# se calcula la media de las columnas seleccionadas
media_columnas = data[columnas_especificas].mean()
# fill == llenar
data[columnas_especificas] = data[columnas_especificas].fillna(media_columnas)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 379 entries, 0 to 378
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Brand                      379 non-null    int64  
 1   Model                      379 non-null    int64  
 2   Operating System           379 non-null    int64  
 3   Connectivity               379 non-null    int64  
 4   Display Type               379 non-null    int64  
 5   Display Size (inches)      379 non-null    float64
 6   Resolution                 379 non-null    int64  
 7   Water Resistance (meters)  379 non-null    int64  
 8   Battery Life (days)        379 non-null    int64  
 9   Heart Rate Monitor         379 non-null    int64  
 10  GPS                        379 non-null    int64  
 11  NFC                        379 non-null    int64  
 12  Price                      379 non-null    float64
dtypes: float64(2), int64(11)
memory usage: 38.6 KB


In [None]:
# Obtener las características (X) y los valores objetivo (y)
X = torch.tensor(data[['Brand', 'Model', 'Operating System', 'Connectivity', 'Display Type', 'Display Size (inches)', 'Resolution', 'Water Resistance (meters)', 'Battery Life (days)', 'Heart Rate Monitor', 'GPS', 'NFC']].values, dtype=torch.float32)
y = torch.tensor(data['Price'].values, dtype=torch.float32)

In [None]:
print((X))

tensor([[  1., 127.,  34.,  ...,   0.,   1.,   1.],
        [ 30.,  36.,  31.,  ...,   0.,   1.,   1.],
        [  8., 105.,   9.,  ...,   0.,   1.,   0.],
        ...,
        [  9., 119.,  12.,  ...,   0.,   1.,   1.],
        [ 26., 118.,   5.,  ...,   0.,   0.,   1.],
        [ 35.,  71.,  31.,  ...,   0.,   1.,   1.]])


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 379 entries, 0 to 378
Data columns (total 13 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Brand                      379 non-null    int64  
 1   Model                      379 non-null    int64  
 2   Operating System           379 non-null    int64  
 3   Connectivity               379 non-null    int64  
 4   Display Type               379 non-null    int64  
 5   Display Size (inches)      379 non-null    float64
 6   Resolution                 379 non-null    int64  
 7   Water Resistance (meters)  379 non-null    int64  
 8   Battery Life (days)        379 non-null    int64  
 9   Heart Rate Monitor         379 non-null    int64  
 10  GPS                        379 non-null    int64  
 11  NFC                        379 non-null    int64  
 12  Price                      379 non-null    float64
dtypes: float64(2), int64(11)
memory usage: 38.6 KB


In [None]:
# Definir el modelo de regresión
class RegressionModel(nn.Module):
    def __init__(self, input_size):
        super(RegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, 1)

    def forward(self, x):
        return self.linear(x)

In [None]:
# Crear una instancia del modelo
input_size = X.shape[1]
model = RegressionModel(input_size)

# Definir la función de pérdida y el optimizador
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Entrenamiento del modelo
num_epochs = 100
batch_size = 32

for epoch in range(num_epochs):
    # Dividir los datos en lotes
    for i in range(0, len(X), batch_size):
        batch_X = X[i:i+batch_size]
        batch_y = y[i:i+batch_size]

        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y.view(-1, 1))

        # Backward pass y actualización de los pesos
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Imprimir la pérdida en cada época
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 100220.6094
Epoch [20/100], Loss: 91338.3672
Epoch [30/100], Loss: 83450.5547
Epoch [40/100], Loss: 76479.5625
Epoch [50/100], Loss: 70349.9609
Epoch [60/100], Loss: 64989.2266
Epoch [70/100], Loss: 60327.6797
Epoch [80/100], Loss: 56298.3945
Epoch [90/100], Loss: 52837.1602
Epoch [100/100], Loss: 49882.4805


In [None]:
fila_deseada = 11

if fila_deseada < len(data):
  fila_completa = data.iloc[fila_deseada].tolist()
  print(fila_completa)
else:
  print("La fila no existe")

[37.0, 57.0, 35.0, 0.0, 0.0, 1.2, 25.0, 4.0, 13.0, 0.0, 0.0, 0.0, 179.0]


In [None]:
X_input = torch.tensor([37.0, 57.0, 35.0, 0.0, 0.0, 1.2, 25.0, 4.0, 13.0, 0.0, 0.0, 0.0], dtype=torch.float32)
# pred
# Hacer la predicción
model.eval()  # Activar el modo de evaluación
with torch.no_grad():
    y_pred = model(X_input)

print(f'Precio predicho: {y_pred.item()}')

Precio predicho: 181.4658660888672
