In [1]:
!pip install onnx
!pip install onnxruntime

Collecting onnx
  Downloading onnx-1.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m61.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: onnx
Successfully installed onnx-1.15.0
Collecting onnxruntime
  Downloading onnxruntime-1.17.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m30.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m16.1 MB/s[0m

In [14]:
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import torch.nn.functional as F
from torch.optim import SGD
import torch.onnx
import onnx
import onnxruntime

In [15]:
D_in, H, D_out = 10, 100, 10

model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H),
    torch.nn.ReLU(),
    torch.nn.Linear(H, D_out),
)

In [16]:
outputs = model(torch.randn(64, 10))
outputs.shape

torch.Size([64, 10])

In [17]:
df = pd.read_csv('riceClassification.csv')
X = df.iloc[:, :10]
Y = df.iloc[:, -1]

X.shape, Y.shape

((17995, 10), (17995,))

In [18]:
x_2 = np.array(X)
y_2 = np.array(Y)

scaler = StandardScaler().fit(x_2)
x_2_normalized = scaler.transform(x_2)

X_train, X_test, y_train, y_test = train_test_split(x_2_normalized, y_2, test_size=0.2, random_state=42)

y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)


In [19]:
def softmax(x):
    return torch.exp(x) / torch.exp(x).sum(axis=-1,keepdims=True)

def cross_entropy(output, target):
    logits = output[torch.arange(len(output)), target]
    loss = - logits + torch.log(torch.sum(torch.exp(output), axis=-1))
    loss = loss.mean()
    return loss

Clase Dataset


In [20]:
class RiceDataset(torch.utils.data.Dataset):
    def __init__(self, X, Y):
        self.X = torch.from_numpy(X).float()
        self.Y = torch.from_numpy(Y).long()
    def __len__(self):
        return len(self.X)

    def __getitem__(self, ix):
        return self.X[ix], self.Y[ix]

In [21]:
dataset = RiceDataset(X_train, y_train)
len(dataset)

14396

In [22]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.3)

epochs = 1000
log_each = 100
checkpoint_each = 20
l = []

for e in range(1, epochs + 1):
    y_pred = model(dataset.X)
    loss = loss_fn(y_pred, dataset.Y)
    l.append(loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if e % log_each == 0:
        print(f"Epoch {e}/{epochs} Loss {np.mean(l):.5f}")

    if e % checkpoint_each == 0:
        torch.save({
            'epoch': e,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss
        }, f"checkpoint_epoch_{e}.pt")

Epoch 100/1000 Loss 0.93312
Epoch 200/1000 Loss 0.77380
Epoch 300/1000 Loss 0.67042
Epoch 400/1000 Loss 0.59080
Epoch 500/1000 Loss 0.52804
Epoch 600/1000 Loss 0.47871
Epoch 700/1000 Loss 0.43944
Epoch 800/1000 Loss 0.40747
Epoch 900/1000 Loss 0.38090
Epoch 1000/1000 Loss 0.35843


In [23]:
def evaluate(x):
    model.eval()
    y_pred = model(x)
    y_probas = softmax(y_pred)
    return torch.argmax(y_probas, axis=1)

y_pred = evaluate(torch.from_numpy(X_test).float())
accuracy_score(y_test, y_pred.cpu().numpy())

0.9680466796332314

Clase DataLoader


In [24]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=62, shuffle=True)

In [25]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = SGD(model.parameters(), lr=0.3)

epochs = 1000
log_each = 100
l = []

for e in range(1, epochs + 1):
    epoch_loss = 0.0
    for inputs, targets in dataloader:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        epoch_loss += loss.item()

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    l.append(epoch_loss / len(dataloader))  # Average loss for the epoch

    if e % log_each == 0:
        print(f"Epoch {e}/{epochs} Loss {np.mean(l):.5f}")

Epoch 100/1000 Loss 0.10331
Epoch 200/1000 Loss 0.09236
Epoch 300/1000 Loss 0.08699
Epoch 400/1000 Loss 0.08321
Epoch 500/1000 Loss 0.08018
Epoch 600/1000 Loss 0.07772
Epoch 700/1000 Loss 0.07564
Epoch 800/1000 Loss 0.07401
Epoch 900/1000 Loss 0.07249
Epoch 1000/1000 Loss 0.07110


Evaluar Dataloader

In [27]:
def evaluate_model(model, dataloader):
    model.eval()  # Set the model to evaluation mode
    all_predictions = []
    all_targets = []

    with torch.no_grad():
        for inputs, targets in dataloader:
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)  # Get the predicted classes
            all_predictions.extend(predictions.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    accuracy = accuracy_score(all_targets, all_predictions)
    return accuracy

# Example usage:
# Assuming you have `model` and `test_dataloader` defined
accuracy = evaluate_model(model, dataloader)
print("Accuracy:", accuracy)

Accuracy: 0.9791608780216727


Cargar todo los checkpoints basados en los epochs


In [12]:
def evaluate(model, x):
    model.eval()
    y_pred = model(x)
    _, predicted = torch.max(y_pred, 1)
    return predicted

accuracies = []
for epoch in range(checkpoint_each, epochs + 1, checkpoint_each):
    checkpoint = torch.load(f"checkpoint_epoch_{epoch}.pt")
    model.load_state_dict(checkpoint['model_state_dict'])

    y_pred = evaluate(model, torch.from_numpy(X_test).float())
    accuracy = accuracy_score(y_test, y_pred.cpu().numpy())
    accuracies.append(accuracy)
    print(f"Accuracy at epoch {epoch}: {accuracy:.4f}")

print(f"Average accuracy: {np.mean(accuracies):.4f}")

Accuracy at epoch 20: 0.6149
Accuracy at epoch 40: 0.6935
Accuracy at epoch 60: 0.7599
Accuracy at epoch 80: 0.8047
Accuracy at epoch 100: 0.8266
Accuracy at epoch 120: 0.8441
Accuracy at epoch 140: 0.7858
Accuracy at epoch 160: 0.7327
Accuracy at epoch 180: 0.7755
Accuracy at epoch 200: 0.7833
Accuracy at epoch 220: 0.7899
Accuracy at epoch 240: 0.8030
Accuracy at epoch 260: 0.8152
Accuracy at epoch 280: 0.8266
Accuracy at epoch 300: 0.8363
Accuracy at epoch 320: 0.8491
Accuracy at epoch 340: 0.8572
Accuracy at epoch 360: 0.8661
Accuracy at epoch 380: 0.8772
Accuracy at epoch 400: 0.8858
Accuracy at epoch 420: 0.8916
Accuracy at epoch 440: 0.8997
Accuracy at epoch 460: 0.9072
Accuracy at epoch 480: 0.9147
Accuracy at epoch 500: 0.9178
Accuracy at epoch 520: 0.9239
Accuracy at epoch 540: 0.9280
Accuracy at epoch 560: 0.9314
Accuracy at epoch 580: 0.9355
Accuracy at epoch 600: 0.9397
Accuracy at epoch 620: 0.9439
Accuracy at epoch 640: 0.9469
Accuracy at epoch 660: 0.9500
Accuracy at ep

Cargar un Epoch a la vez


In [13]:
def evaluate(model, x):
    model.eval()
    y_pred = model(x)
    _, predicted = torch.max(y_pred, 1)
    return predicted

checkpoint_file = "checkpoint_epoch_1000.pt"

checkpoint = torch.load(checkpoint_file)
model.load_state_dict(checkpoint['model_state_dict'])

y_pred = evaluate(model, torch.from_numpy(X_test).float())
accuracy = accuracy_score(y_test, y_pred.cpu().numpy())
print(f"Accuracy using {checkpoint_file}: {accuracy:.4f}")

Accuracy using checkpoint_epoch_1000.pt: 0.9686


Torchscript

In [28]:
scripted_model = torch.jit.script(model)
torch.jit.save(scripted_model, 'scripted_model.pt')

In [29]:
scripted_model = torch.jit.load("scripted_model.pt")
X_t = torch.from_numpy(X_test).float()

def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        predicted = model(X_test)
        _, predicted_labels = torch.max(predicted, 1)
        accuracy = accuracy_score(y_test, predicted_labels.numpy())
        return accuracy

test_accuracy = evaluate_model(scripted_model, X_t, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.9700


ONNX


In [31]:
dummy_input = torch.randn(64, 10)
torch.onnx.export(model, dummy_input, "model.onnx")