# Getting dataset using make_classification

In [None]:
from sklearn.datasets import make_classification
X, Y = make_classification(n_samples = 100, n_features = 4, n_redundant = 0,
                           n_informative = 3, n_clusters_per_class = 2, n_classes = 3)

In [None]:
X.shape

(100, 4)

In [None]:
Y.shape

(100,)

In [None]:
import numpy as np
np.unique(Y)

array([0, 1, 2])

In [None]:
X.dtype

dtype('float64')

# Train test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33 , random_state = 42)

# Changing the dtype of data to tensor

### Example of dtype change

In [None]:
import torch
Xt = torch.from_numpy(X_train)

In [None]:
Xt.shape

torch.Size([67, 4])

In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader         # Dataset?
class Data(Dataset):                                     # importing Dataset into the class Data
  def __init__(self, X_train, y_train):
    # need to convert float64 to float32 else 
    # will get the following error
    # RuntimeError: expected scalar type Double but found Float
    self.X = torch.from_numpy(X_train.astype(np.float32))
    # need to convert float64 to Long else 
    # will get the following error
    # RuntimeError: expected scalar type Long but found Float
    self.y = torch.from_numpy(y_train)
    self.len = self.X.shape[0]
  
  # magic or dunder methods

  def __getitem__(self, index):             # allows the indexing to get traindata
    return self.X[index], self.y[index]
  
  def __len__(self):
    return self.len

In [None]:
traindata = Data(X_train, Y_train)

In [None]:
traindata

<__main__.Data at 0x7f1a301cf3a0>

In [None]:
traindata[0:5]

(tensor([[ 0.2366,  1.5950,  0.6542,  0.1789],
         [-0.3634,  1.5775,  1.0315,  0.0161],
         [-2.1551, -0.3461,  0.2691,  0.3024],
         [-0.6549,  1.1609, -1.8496,  0.7394],
         [ 0.7300, -0.9385, -0.2187, -2.3223]]), tensor([0, 0, 1, 1, 1]))

In [None]:
batch_size = 4
trainloader = DataLoader(traindata, batch_size = batch_size, shuffle = True, num_workers = 2)

In [None]:
trainloader

<torch.utils.data.dataloader.DataLoader at 0x7f19b760e310>

# Building a NN

In [None]:
import torch.nn as nn
# number of features (len of X cols)
input_dim = 4
# number of hidden layers
hidden_layers = 25
# number of classes (unique of y)
output_dim = 3
class Network(nn.Module):
  def __init__(self):
    super(Network, self).__init__()
    self.linear1 = nn.Linear(input_dim, hidden_layers)
    self.linear2 = nn.Linear(hidden_layers, output_dim)
  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = self.linear2(x)
    return x

In [None]:
clf = Network()

In [None]:
print(clf.parameters)

<bound method Module.parameters of Network(
  (linear1): Linear(in_features=4, out_features=25, bias=True)
  (linear2): Linear(in_features=25, out_features=3, bias=True)
)>


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(clf.parameters(), lr=0.1)

In [None]:
epochs = 10
for epoch in range(epochs):
  running_loss = 0.0
  for i, data in enumerate(trainloader):
    inputs, labels = data
    # set optimizer to zero grad to remove previous epoch gradients
    optimizer.zero_grad()
    # forward propagation
    outputs = clf(inputs)
    loss = criterion(outputs, labels)
    # backward propagation
    loss.backward()                                    # grdient nikaleko
    # optimize
    optimizer.step()
    running_loss += loss.item()
  # display statistics
  print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss:.5f}') 

[1,    17] loss: 18.89952
[2,    17] loss: 15.78039
[3,    17] loss: 12.51222
[4,    17] loss: 10.31081
[5,    17] loss: 8.82788
[6,    17] loss: 9.99770
[7,    17] loss: 7.90330
[8,    17] loss: 5.68096
[9,    17] loss: 5.73005
[10,    17] loss: 4.52860


In [None]:
clf.state_dict()

OrderedDict([('linear1.weight',
              tensor([[ 6.4617e-01, -6.2798e-01,  1.1565e-01, -1.3092e+00],
                      [ 9.3136e-01,  2.8523e+00,  5.0200e-01, -5.7020e-01],
                      [ 1.4039e+00, -2.4039e+00,  1.0425e+00, -1.5485e-01],
                      [ 4.4637e-01,  5.4531e-01,  3.8703e+00,  1.3449e+00],
                      [-1.4853e+00, -6.0801e-01, -1.2851e+00,  1.4467e+00],
                      [ 1.1012e+00,  3.1892e+00,  7.0156e-01, -4.5884e-01],
                      [ 2.7136e+00, -9.1211e-02, -2.9659e+00, -4.7106e-01],
                      [ 5.4055e-01,  9.0803e-01,  4.3241e-01, -6.4346e-01],
                      [-3.0380e+00, -1.0926e-01, -1.1991e+00,  6.9709e-01],
                      [ 2.6767e+00,  9.6382e-01, -4.1089e+00, -1.3055e+00],
                      [-2.2710e+00, -3.9366e-02, -1.1182e+00,  5.4076e-01],
                      [ 5.8593e-01,  1.1672e+00,  1.9244e+00, -2.4074e+00],
                      [ 2.3862e+00, -2.5761e+00,  5.0184

In [None]:
# save the trained model
PATH = './mymodel.pth'
torch.save(clf.state_dict(), PATH)

In [None]:
clf = Network()
clf.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [None]:
testdata = Data(X_test, Y_test)
testloader = DataLoader(testdata, batch_size=batch_size, 
                        shuffle=True, num_workers=2)

In [None]:
dataiter = iter(testloader)
inputs, labels = next(dataiter)

In [None]:
inputs

tensor([[ 1.5097,  0.2101, -1.3231,  0.5879],
        [ 1.2197,  0.7961, -0.5712,  0.2742],
        [ 1.7085,  0.5393, -0.2179, -0.1527],
        [ 0.0703, -2.7599,  2.0869, -0.6138]])

In [None]:
labels

tensor([1, 0, 2, 2])

In [None]:
outputs = clf(inputs)
print(outputs)

tensor([[-3.0240,  0.4437,  1.4500],
        [-1.0547, -1.6635,  1.1216],
        [-0.1609, -1.6499,  0.4523],
        [-0.5794, -0.4949,  1.7394]], grad_fn=<AddmmBackward0>)


In [None]:
outputs = clf(inputs)
__, predicted = torch.max(outputs, 1)
print(predicted)

tensor([2, 2, 2, 2])


In [None]:
correct, total = 0, 0
# no need to calculate gradients during inference
with torch.no_grad():
  for data in testloader:
    inputs, labels = data
    # calculate output by running through the network
    outputs = clf(inputs)
    # get the predictions
    __, predicted = torch.max(outputs.data, 1)
    # update results
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the {len(testdata)} test data: {100 * correct // total} %')

Accuracy of the network on the 33 test data: 69 %


___