In [2]:
import pandas as pd

# Cargando el dataset
data = pd.read_csv('./data/train_clean.csv')

# Mostrando las primeras filas del dataset para tener una idea del contenido
data.head()


Unnamed: 0,filename,growth_stage,damage,extent,season
0,L398F01276C01S07896Rp26856.jpg,F,DR,0,LR2021
1,L355F02268C02S08916Rp30595.jpg,F,DR,0,LR2021
2,L134F00879C39S14248Rp42482.jpg,F,DR,10,SR2021
3,L1095F01359C01S08260Rp24587.jpg,F,DR,10,LR2021
4,L134F00777C39S14171Rp45993.jpg,F,DR,10,SR2021


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Seleccionando las características y la variable objetivo
X = data[['growth_stage', 'damage']]
y = data['extent']

# Realizando one-hot encoding para las características categóricas
encoder = OneHotEncoder(drop='first', sparse=False)
X_encoded = encoder.fit_transform(X)

# Dividiendo el conjunto de datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape




((20856, 10), (5215, 10))

In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Entrenando un modelo de regresión lineal
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predicciones en el conjunto de prueba
y_pred = lr_model.predict(X_test)

# Calculando el error cuadrático medio (MSE) para evaluar el rendimiento del modelo
mse = mean_squared_error(y_test, y_pred)

mse


182.84969544019617

In [5]:
from sklearn.ensemble import RandomForestRegressor

# Entrenando un modelo de Random Forest
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predicciones en el conjunto de prueba
y_pred_rf = rf_model.predict(X_test)

# Calculando el error cuadrático medio (MSE) para evaluar el rendimiento del modelo
mse_rf = mean_squared_error(y_test, y_pred_rf)

mse_rf


180.7766557051277

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Convirtiendo los datos a tensores de PyTorch
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train.values).view(-1, 1)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test.values).view(-1, 1)

# Creando conjuntos de datos y cargadores de datos
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

X_train_tensor.shape, y_train_tensor.shape


(torch.Size([20856, 10]), torch.Size([20856, 1]))

In [8]:
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 64)  # Capa oculta
        self.fc2 = nn.Linear(64, 1)   # Capa de salida

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [21]:
model = SimpleNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [20]:
from torch.utils.tensorboard import SummaryWriter

# Crear un SummaryWriter para escribir a la carpeta './runs'
writer = SummaryWriter('runs/simple_nn3')


In [22]:
num_epochs = 1000
def evaluate_model(model, test_loader, criterion):
    """
    Evalúa el modelo en el conjunto de validación.
    """
    model.eval()  # Pone el modelo en modo de evaluación
    total_loss = 0.0
    with torch.no_grad():  # Desactiva el cálculo de gradientes durante la evaluación
        for data, target in test_loader:
            outputs = model(data)
            loss = criterion(outputs, target)
            total_loss += loss.item()
    average_loss = total_loss / len(test_loader)
    return average_loss

# Training loop modificado para incluir "validation loss":
for epoch in range(num_epochs):
    model.train()  # Pone el modelo en modo de entrenamiento
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

    # Evaluar el "validation loss"
    val_loss = evaluate_model(model, test_loader, criterion)

    # Imprimir la pérdida cada 10 épocas
    if epoch % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.4f}, Validation Loss: {val_loss:.4f}')
        
        # Escribir en TensorBoard
        writer.add_scalar('training loss', loss.item(), global_step= epoch)
        writer.add_scalar('validation loss', val_loss, global_step=epoch)
        writer.add_scalars('Losses', {
        'Training Loss': loss.item(),
        'Validation Loss': val_loss
        }, global_step=epoch)


Epoch [1/1000], Training Loss: 397.5710, Validation Loss: 294.7775
Epoch [101/1000], Training Loss: 144.4411, Validation Loss: 180.6687
Epoch [201/1000], Training Loss: 179.0697, Validation Loss: 180.9320
Epoch [301/1000], Training Loss: 109.5014, Validation Loss: 181.7579
Epoch [401/1000], Training Loss: 197.4581, Validation Loss: 181.4959
Epoch [501/1000], Training Loss: 102.2847, Validation Loss: 181.3546
Epoch [601/1000], Training Loss: 291.3906, Validation Loss: 181.3379
Epoch [701/1000], Training Loss: 229.0615, Validation Loss: 181.3561
Epoch [801/1000], Training Loss: 112.8123, Validation Loss: 181.0569
Epoch [901/1000], Training Loss: 146.2479, Validation Loss: 181.5776


In [26]:
# Evaluar un dato
# Assuming `data` is a single data point and `target` is the corresponding target value

ejemplo = {
   # "filename": "L355F02268C02S08916Rp30595.jpg",
    "growth_stage": "F",
    "damage":"DR",
   # "season":"LR2021"
}

new_data = pd.DataFrame([ejemplo])
new_data.head()





Unnamed: 0,growth_stage,damage
0,F,DR


In [27]:
new_data_encoded = encoder.transform(new_data)

new_data_encoded.shape

(1, 10)

In [33]:
# 1. Preprocesar el nuevo dato

# Realizar one-hot encoding con el encoder ya entrenado
new_data_encoded = encoder.transform(new_data)

# Convertir el numpy array a un tensor de PyTorch
new_data_tensor = torch.FloatTensor(new_data_encoded)

# 2. Realizar una predicción con el modelo

model.eval()  # Put the model in evaluation mode
with torch.no_grad():  # Disable gradient calculation during evaluation
    output = model(new_data_tensor)

# Output de tensor a int
output_int = output.item()
print(output_int)


tensor([[33.6498]])
33.64975357055664
