In [1]:
import pandas as pd

# Cargando el dataset
data = pd.read_csv('./data/train_clean.csv')

# Mostrando las primeras filas del dataset para tener una idea del contenido
data.head()

data['extent'] = data['extent']/ 10


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Seleccionando las características y la variable objetivo
X = data[['growth_stage', 'damage']]
y = data['extent']

# Realizando one-hot encoding para las características categóricas
encoder = OneHotEncoder(drop='first', sparse=False)
X_encoded = encoder.fit_transform(X)

# Dividiendo el conjunto de datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape




((20856, 10), (5215, 10))

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Convirtiendo los datos a tensores de PyTorch
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train.values).view(-1, 1)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test.values).view(-1, 1)

# Creando conjuntos de datos y cargadores de datos
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

X_train_tensor.shape, y_train_tensor.shape


(torch.Size([20856, 10]), torch.Size([20856, 1]))

In [4]:
class ClassificationNN(nn.Module):
    def __init__(self):
        super(ClassificationNN, self).__init__()
        self.fc1 = nn.Linear(X_train.shape[1], 64)  # Capa oculta
        self.fc2 = nn.Linear(64, 11)  # Capa de salida con 11 neuronas

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [5]:
model = ClassificationNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [6]:
y_train_indices = (y_train / 10).astype(int)
y_test_indices = (y_test / 10).astype(int)

y_train_tensor = torch.LongTensor(y_train_indices.values)
y_test_tensor = torch.LongTensor(y_test_indices.values)


In [7]:
from torch.utils.tensorboard import SummaryWriter

# Crear un SummaryWriter para escribir a la carpeta './runs'
writer = SummaryWriter('runs/classification_nn')


In [8]:
def evaluate_model(model, test_loader, criterion):
    """
    Evalúa el modelo en el conjunto de validación.
    """
    model.eval()  # Pone el modelo en modo de evaluación
    total_loss = 0.0
    with torch.no_grad():  # Desactiva el cálculo de gradientes durante la evaluación
        for data, target in test_loader:
            outputs = model(data)
            loss = criterion(outputs, target)
            total_loss += loss.item()
    average_loss = total_loss / len(test_loader)
    return average_loss


In [9]:
y_train_tensor = torch.LongTensor(y_train.values)  # Not .view(-1, 1)
y_test_tensor = torch.LongTensor(y_test.values)    # Not .view(-1, 1)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [10]:
num_epochs = 1000
# Training loop modificado para incluir "validation loss":
for epoch in range(num_epochs):
    model.train()  # Pone el modelo en modo de entrenamiento
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

    # Evaluar el "validation loss"
    val_loss = evaluate_model(model, test_loader, criterion)

    # Imprimir la pérdida cada 10 épocas
    if epoch % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.4f}, Validation Loss: {val_loss:.4f}')
        
        # Escribir en TensorBoard
        writer.add_scalar('training loss', loss.item(), global_step= epoch)
        writer.add_scalar('validation loss', val_loss, global_step=epoch)
        writer.add_scalars('Losses', {
        'Training Loss': loss.item(),
        'Validation Loss': val_loss
        }, global_step=epoch)


Epoch [1/1000], Training Loss: 1.1935, Validation Loss: 0.9764
Epoch [101/1000], Training Loss: 1.1496, Validation Loss: 0.9473
Epoch [201/1000], Training Loss: 0.9797, Validation Loss: 0.9507
Epoch [301/1000], Training Loss: 0.8369, Validation Loss: 0.9528
Epoch [401/1000], Training Loss: 0.8397, Validation Loss: 0.9562
Epoch [501/1000], Training Loss: 1.0711, Validation Loss: 0.9604
Epoch [601/1000], Training Loss: 0.9531, Validation Loss: 0.9631
Epoch [701/1000], Training Loss: 1.0963, Validation Loss: 0.9653
Epoch [801/1000], Training Loss: 1.0105, Validation Loss: 0.9664
Epoch [901/1000], Training Loss: 0.9771, Validation Loss: 0.9695


In [11]:
# Evaluar un dato
# Assuming `data` is a single data point and `target` is the corresponding target value

ejemplo = {
   # "filename": "L355F02268C02S08916Rp30595.jpg",
    "growth_stage": "F",
    "damage":"DR",
   # "season":"LR2021"
}

new_data = pd.DataFrame([ejemplo])
new_data.head()





Unnamed: 0,growth_stage,damage
0,F,DR


In [12]:
new_data_encoded = encoder.transform(new_data)

new_data_encoded.shape

(1, 10)

In [14]:
# 1. Preprocesar el nuevo dato

# Realizar one-hot encoding con el encoder ya entrenado
new_data_encoded = encoder.transform(new_data)

# Convertir el numpy array a un tensor de PyTorch
new_data_tensor = torch.FloatTensor(new_data_encoded)

# 2. Realizar una predicción con el modelo

# Por ejemplo, para evaluar:
model.eval()
with torch.no_grad():
    outputs = model(new_data_tensor)
    probabilities = torch.nn.functional.softmax(outputs, dim=1)
    predicted_class = torch.argmax(probabilities, dim=1)
    predicted_label = predicted_class * 10  # Convertir índice de clase nuevamente a la etiqueta original
    print(predicted_label)


tensor([10])
