In [23]:
from sklearn.datasets import make_classification
import torch
from torch.utils.data import DataLoader, Dataset, TensorDataset

In [24]:
X, y = make_classification(
    n_samples = 10,
    n_features = 5,
    n_informative = 5,
    n_redundant = 0,
    n_classes = 2,
    random_state = 42
)

In [25]:
print(X.shape)
print(X)

(10, 5)
[[-0.61585508  2.58130764  1.62963662  0.2085941  -4.03188055]
 [-0.22730521 -1.37373457 -0.594329    1.38230672 -1.16037018]
 [-2.03801522  0.41396683 -2.60284125  1.80617675 -1.73328175]
 [ 0.4663646   2.03142219 -1.5444069   2.26477017  0.32119417]
 [-0.51804757 -0.76118614 -1.56591406  1.67254839 -0.25264082]
 [-1.28661346 -0.20591774 -1.23644371  1.47525773 -0.61134412]
 [ 1.17951155 -1.13445243  1.76200957 -2.72110395  3.93604668]
 [ 1.4204896   1.62418883  1.44165171 -1.38234717  1.2629718 ]
 [-1.84482584  0.18879659 -0.62827939  0.12570307 -2.10762465]
 [ 2.48577339 -0.48905762  0.41917299  1.4018005   2.41277893]]


In [6]:
print(y.shape)
print(y)

(10,)
[1 1 0 0 0 1 1 1 0 0]


## One way if we have this scenario is we can make use of Tensor Dataset --divide using torch.utils.split_data and then create Dataloader

In [10]:
X = TensorDataset(torch.tensor(X),torch.tensor(y))

In [14]:
print(X[0])
print(len(X))

(tensor([-0.6159,  2.5813,  1.6296,  0.2086, -4.0319], dtype=torch.float64), tensor(1))
10


In [18]:
X_train,X_test = torch.utils.data.random_split(X,[5,5])

In [20]:
print(len(X_train))
print(len(X_test))

5
5


## Using CustoDataset class by torch

In [26]:
X = torch.tensor(X, dtype = torch.float32)
y = torch.tensor(y, dtype = torch.int)

In [28]:
X

tensor([[-0.6159,  2.5813,  1.6296,  0.2086, -4.0319],
        [-0.2273, -1.3737, -0.5943,  1.3823, -1.1604],
        [-2.0380,  0.4140, -2.6028,  1.8062, -1.7333],
        [ 0.4664,  2.0314, -1.5444,  2.2648,  0.3212],
        [-0.5180, -0.7612, -1.5659,  1.6725, -0.2526],
        [-1.2866, -0.2059, -1.2364,  1.4753, -0.6113],
        [ 1.1795, -1.1345,  1.7620, -2.7211,  3.9360],
        [ 1.4205,  1.6242,  1.4417, -1.3823,  1.2630],
        [-1.8448,  0.1888, -0.6283,  0.1257, -2.1076],
        [ 2.4858, -0.4891,  0.4192,  1.4018,  2.4128]])

In [38]:
class Custom_prac(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self,index):
        features = self.features[index]
        labels = self.labels[index]
        return  features, labels


In [39]:
dataset = Custom_prac(X,y)

In [40]:
print(dataset[0])
print(len(dataset))

(tensor([-0.6159,  2.5813,  1.6296,  0.2086, -4.0319]), tensor(1, dtype=torch.int32))
10


In [41]:
## split
train, test = torch.utils.data.random_split(dataset, [5,5])

In [49]:
print(len(train))

5


In [50]:
train_loader = DataLoader(train, batch_size = 2, shuffle = True)
test_loader = DataLoader(test, batch_size = 2, shuffle =False)

In [51]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x1669b4cd0>

In [53]:
for batch_feature, batch_labels in train_loader:
    print(batch_feature)
    print(batch_labels)


tensor([[-0.5180, -0.7612, -1.5659,  1.6725, -0.2526],
        [ 1.4205,  1.6242,  1.4417, -1.3823,  1.2630]])
tensor([0, 1], dtype=torch.int32)
tensor([[-0.2273, -1.3737, -0.5943,  1.3823, -1.1604],
        [-1.8448,  0.1888, -0.6283,  0.1257, -2.1076]])
tensor([1, 0], dtype=torch.int32)
tensor([[-1.2866, -0.2059, -1.2364,  1.4753, -0.6113]])
tensor([1], dtype=torch.int32)
