# Predecir datos de suelo actuales con PyTorch

Este notebook implementa un modelo de regresión en PyTorch para predecir datos de suelo basados en datos históricos.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Importar bibliotecas necesarias

In [None]:
import torch
import torch.nn as nn
from torch.optim import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

## Cargar los datos

In [None]:
data = pd.read_csv('/content/drive/MyDrive/datos_limpios/soil_data_guatemala_limpios.csv')

data.head()

Unnamed: 0,municipio,fecha,nitrogen,phosphorus,potassium,ph
0,"Mazatenango, Suchitepéquez",2019-01-01,0.89,8.83,146.92,7.71
1,"Chicacao, Suchitepéquez",2019-01-01,1.21,7.07,179.13,5.87
2,"San Antonio, Suchitepéquez",2019-01-01,0.81,6.38,255.3,7.31
3,"Quetzaltenango, Quetzaltenango",2019-01-01,1.32,13.23,461.49,7.76
4,"Cantel, Quetzaltenango",2019-01-01,0.28,27.43,382.81,6.08


## Preprocesamiento: One-hot encoding y normalización

In [None]:
# One-hot encoding para la columna 'municipio'
encoder = OneHotEncoder(sparse_output=False)
municipio_encoded = encoder.fit_transform(data[['municipio']])
municipio_columns = encoder.get_feature_names_out(['municipio'])

# Concatenar las características codificadas con las otras características
encoded_municipio_df = pd.DataFrame(municipio_encoded, columns=municipio_columns)
data = pd.concat([data, encoded_municipio_df], axis=1)

# Normalizar las columnas numéricas
scaler = StandardScaler()
data[['nitrogen', 'phosphorus', 'potassium', 'ph']] = scaler.fit_transform(data[['nitrogen', 'phosphorus', 'potassium', 'ph']])

# Mostrar las primeras filas para comprobar el preprocesamiento
data.head()

Unnamed: 0,municipio,fecha,nitrogen,phosphorus,potassium,ph,"municipio_Cantel, Quetzaltenango","municipio_Chicacao, Suchitepéquez","municipio_Concepción Chiquirichapa, Quetzaltenango","municipio_Escuintla, Escuintla","municipio_Mazatenango, Suchitepéquez","municipio_Quetzaltenango, Quetzaltenango","municipio_San Antonio, Suchitepéquez","municipio_Santa Lucía Cotzumalguapa, Escuintla","municipio_Tiquisate, Escuintla"
0,"Mazatenango, Suchitepéquez",2019-01-01,0.211955,-1.196369,-1.320538,1.325749,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,"Chicacao, Suchitepéquez",2019-01-01,1.005843,-1.440312,-1.041032,-1.243249,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"San Antonio, Suchitepéquez",2019-01-01,0.013483,-1.535948,-0.380059,0.767271,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,"Quetzaltenango, Quetzaltenango",2019-01-01,1.278742,-0.586512,1.409178,1.395559,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,"Cantel, Quetzaltenango",2019-01-01,-1.301395,1.381661,0.726423,-0.950048,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Dividir los datos en características (X) y etiquetas (y)

In [None]:
X = data[['nitrogen', 'phosphorus', 'potassium', 'ph'] + list(municipio_columns)].values
y = data[['nitrogen', 'phosphorus', 'potassium', 'ph']].values

# Convertir a tensores de PyTorch
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32)

# Dividir los datos en conjunto de entrenamiento y validación
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Crear DataLoader para cargar los datos en lotes
train_data = TensorDataset(X_train, y_train)
val_data = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
val_loader = DataLoader(val_data, batch_size=128, shuffle=False)

## 4. Definir el modelo

In [None]:
class SoilModel(nn.Module):
    def __init__(self):
        super(SoilModel, self).__init__()
        self.fc1 = nn.Linear(4 + len(municipio_columns), 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 4)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## Configuración del optimizador y función de pérdida

In [None]:
model = SoilModel()

# Configurar el optimizador y la función de pérdida
optimizer = Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Inicializar variables para early stopping
best_val_loss = float('inf')
patience = 30
patience_counter = 0

## Entrenamiento del modelo

In [None]:
# Entrenamiento del modelo
epochs = 1000  # Un número de épocas más razonable
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    # Entrenamiento
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    train_loss = running_loss / len(train_loader)

    # Validación
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            val_loss += loss.item()

    val_loss /= len(val_loader)

    print(f"Epoch {epoch+1}/{epochs}, Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

Epoch 1/1000, Loss: 0.2007, Validation Loss: 0.0036
Epoch 2/1000, Loss: 0.0024, Validation Loss: 0.0017
Epoch 3/1000, Loss: 0.0013, Validation Loss: 0.0010
Epoch 4/1000, Loss: 0.0009, Validation Loss: 0.0008
Epoch 5/1000, Loss: 0.0007, Validation Loss: 0.0006
Epoch 6/1000, Loss: 0.0006, Validation Loss: 0.0005
Epoch 7/1000, Loss: 0.0005, Validation Loss: 0.0005
Epoch 8/1000, Loss: 0.0004, Validation Loss: 0.0004
Epoch 9/1000, Loss: 0.0004, Validation Loss: 0.0004
Epoch 10/1000, Loss: 0.0003, Validation Loss: 0.0003
Epoch 11/1000, Loss: 0.0003, Validation Loss: 0.0003
Epoch 12/1000, Loss: 0.0003, Validation Loss: 0.0003
Epoch 13/1000, Loss: 0.0002, Validation Loss: 0.0002
Epoch 14/1000, Loss: 0.0002, Validation Loss: 0.0002
Epoch 15/1000, Loss: 0.0002, Validation Loss: 0.0002
Epoch 16/1000, Loss: 0.0002, Validation Loss: 0.0002
Epoch 17/1000, Loss: 0.0002, Validation Loss: 0.0002
Epoch 18/1000, Loss: 0.0002, Validation Loss: 0.0002
Epoch 19/1000, Loss: 0.0002, Validation Loss: 0.0002
Ep

## Evaluación y predicción

In [None]:
model.eval()
predictions = []
true_values = []

with torch.no_grad():

    for inputs, labels in val_loader:
        outputs = model(inputs)
        predictions.append(outputs.numpy())
        true_values.append(labels.numpy())

# Convertir a arrays de numpy para evaluar
predictions = np.concatenate(predictions, axis=0)
true_values = np.concatenate(true_values, axis=0)

# Calcular el error cuadrático medio (MSE) de las predicciones
mse = mean_squared_error(true_values, predictions)
print(f'Mean Squared Error: {mse:.4f}')

# Si tienes más municipios codificados, asegúrate de incluir todas las columnas one-hot necesarias
new_data = np.array([[1.0, 0.5, 0.8, 7.0] + [0, 1, 0, 0, 0, 0, 1, 0, 0]])

# Verificar el tamaño de la entrada
print("Tamaño de new_data:", new_data.shape)

# Convertir a tensor
new_data_tensor = torch.tensor(new_data, dtype=torch.float32)

# Verificar el tamaño de new_data_tensor
print("Tamaño de new_data_tensor:", new_data_tensor.shape)

# Realizar la predicción con el modelo entrenado
model.eval()
with torch.no_grad():
    prediction = model(new_data_tensor)
    print(f"Predicción para los nuevos datos: {prediction.numpy()}")

Mean Squared Error: 0.0000
Tamaño de new_data: (1, 13)
Tamaño de new_data_tensor: torch.Size([1, 13])
Predicción para los nuevos datos: [[0.9733514  0.65019834 0.8690365  6.974343  ]]


## 8. Guardar el modelo entrenado

In [None]:
# Guardar el modelo en un archivo
model_path = "soil_model.pth"
torch.save(model.state_dict(), model_path)
print(f"Modelo guardado en: {model_path}")

Modelo guardado en: soil_model.pth


## 9. Cargar el modelo guardado

In [None]:
# Crear una nueva instancia del modelo y cargar los pesos
loaded_model = SoilModel(input_size=X_train.shape[1], output_size=y_train.shape[1])
loaded_model.load_state_dict(torch.load(model_path))
loaded_model.eval()
print("Modelo cargado exitosamente.")