<a href="https://colab.research.google.com/github/NeuralDataMind/PyTorch/blob/main/Datasets_%26_Dataloaders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
digits = load_digits()
X, y = digits.data, digits.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 11)

In [4]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
class CustomDataset(Dataset):
  def __init__(self, data, target):
    self.data = torch.tensor(data, dtype = torch.float32)
    self.target = torch.tensor(target, dtype = torch.long)

  def __len__(self):
    return len(self.data)

  def __getitem__(self, index):
    sample = {'data': self.data[index], 'target': self.target[index]}
    return sample

In [8]:
train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [10]:
len(train_dataset), len(test_dataset)

(1437, 360)

In [13]:
train_dataset[0]

{'data': tensor([ 0.0000,  0.7855,  1.4473,  0.9870,  0.9711,  1.6287, -0.4107, -0.1187,
         -0.0558,  1.5645,  0.4803, -1.2699, -0.4852,  1.3371, -0.5054, -0.1243,
         -0.0354, -0.7298, -1.5627, -1.2109,  0.1465,  1.0232, -0.5341, -0.1049,
         -0.0373, -0.7800, -0.3372,  1.0441,  0.9914,  1.4639,  2.3861, -0.0264,
          0.0000,  0.1808,  1.1503,  0.4545,  0.7962, -0.8123, -0.2567,  0.0000,
         -0.0577, -0.5369, -0.9146,  0.7433, -0.0886, -1.4491, -0.8084, -0.0940,
         -0.0334, -0.4056, -0.9975,  1.2375, -1.3842, -1.4744, -0.7676, -0.2048,
         -0.0264, -0.2901,  1.4802, -0.7153, -2.4151, -1.1653, -0.4980, -0.1942]),
 'target': tensor(7)}

In [14]:
train_data_loader = DataLoader(dataset = train_dataset, batch_size = 32, shuffle = True, num_workers = 2)
test_data_loader =  DataLoader(dataset = train_dataset, batch_size = 32, shuffle = False, num_workers = 2)

In [22]:
class SimpleNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(SimpleNN, self).__init__()
    self.layer_1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.layer_2 = nn.Linear(hidden_size, output_size)

  def forward(self, x):
    x = self.layer_1(x)
    x = self.relu(x)
    x = self.layer_2(x)
    return x

In [18]:
input_size = X_train.shape[1]
hidden_size = 64
output_size = len(set(y_train))

In [23]:
model = SimpleNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [26]:
num_epochs = 100

for epoch in range(num_epochs):
  model.train()
  running_loss = 0.0

  for batch in train_data_loader:
    inputs = batch['data']
    targets = batch['target']

    optimizer.zero_grad()
    outputs = model(inputs)

    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  print(f"Epoch {epoch + 1} / {num_epochs}, Loss: {running_loss / len(train_data_loader)}")

Epoch 1 / 100, Loss: 0.07617658091088136
Epoch 2 / 100, Loss: 0.06730937597652277
Epoch 3 / 100, Loss: 0.05955544879866971
Epoch 4 / 100, Loss: 0.053112912591960694
Epoch 5 / 100, Loss: 0.04698667666978306
Epoch 6 / 100, Loss: 0.04221758654134141
Epoch 7 / 100, Loss: 0.03808704862991969
Epoch 8 / 100, Loss: 0.03458450726336903
Epoch 9 / 100, Loss: 0.03126906418345041
Epoch 10 / 100, Loss: 0.028401612573199803
Epoch 11 / 100, Loss: 0.026010103772083917
Epoch 12 / 100, Loss: 0.02386175019459592
Epoch 13 / 100, Loss: 0.021827108268108634
Epoch 14 / 100, Loss: 0.02037390876147482
Epoch 15 / 100, Loss: 0.018404939698262346
Epoch 16 / 100, Loss: 0.016993618735836612
Epoch 17 / 100, Loss: 0.015776541073703104
Epoch 18 / 100, Loss: 0.014503854989177651
Epoch 19 / 100, Loss: 0.013630189643137985
Epoch 20 / 100, Loss: 0.012582287218214737
Epoch 21 / 100, Loss: 0.011737938835803005
Epoch 22 / 100, Loss: 0.010862363698995776
Epoch 23 / 100, Loss: 0.010195854502833552
Epoch 24 / 100, Loss: 0.009527

In [28]:
# evaluation

model.eval()
all_predictions = []
all_targets = []

with torch.no_grad():
  for batch in test_data_loader:
    inputs = batch['data']
    targets = batch['target']

    outputs = model(inputs)
    predictions = torch.argmax(outputs, dim = 1)

    all_predictions.extend(predictions.cpu().numpy())
    all_targets.extend(targets.cpu().numpy())

accuracy = accuracy_score(all_targets, all_predictions)
print(f"Accuracy Test: {accuracy * 100: .3f}%")

Accuracy Test:  100.000%
