<a href="https://colab.research.google.com/github/ShoaibMuhammad123/Pytorch/blob/main/Pytorch6_Dataset_DataLoader.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import torch as nn

In [None]:
from sklearn.datasets import make_classification
import torch

In [None]:
X,y = make_classification(
    n_samples=10,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=2,
    random_state=42
)

In [None]:
X

array([[ 1.06833894, -0.97007347],
       [-1.14021544, -0.83879234],
       [-2.8953973 ,  1.97686236],
       [-0.72063436, -0.96059253],
       [-1.96287438, -0.99225135],
       [-0.9382051 , -0.54304815],
       [ 1.72725924, -1.18582677],
       [ 1.77736657,  1.51157598],
       [ 1.89969252,  0.83444483],
       [-0.58723065, -1.97171753]])

In [None]:
y

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [None]:
X.shape

(10, 2)

In [None]:
X = torch.tensor(X,dtype=torch.float32)
y = torch.tensor(y,dtype=torch.long)

In [None]:
X

tensor([[ 1.0683, -0.9701],
        [-1.1402, -0.8388],
        [-2.8954,  1.9769],
        [-0.7206, -0.9606],
        [-1.9629, -0.9923],
        [-0.9382, -0.5430],
        [ 1.7273, -1.1858],
        [ 1.7774,  1.5116],
        [ 1.8997,  0.8344],
        [-0.5872, -1.9717]])

In [None]:
y

tensor([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

# **Dataset and DataLoader**

In [None]:
from torch.utils.data import Dataset , DataLoader

* CustomClass has
  - def __init__()
  - def __len__()
  - def __getitem__()

In [None]:
# First we have to make a class Custom Dataset (you can choose any name for it)

class CustomDataset(Dataset):

  # constructor get two input one is features and the other is labels
  def __init__(self,features,labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]  # total number of rows in your data

  def __getitem__(self,index):    # it get one input which is index and return the row of that index

    return self.features[index],self.labels[index]


# this become our custom dataset



In [None]:
dataset = CustomDataset(X,y)

In [None]:
dataset

<__main__.CustomDataset at 0x7bb7b69f63d0>

In [None]:
len(dataset)

10

In [None]:
dataset[0]

(tensor([ 1.0683, -0.9701]), tensor(1))

### **Data Loader**

In [None]:
dataloader = DataLoader(dataset,batch_size=2,shuffle=True)    # for this we need to pass two main thing 1- object of dataset
                              # 2- batch_size = i.e 2
                              # 3- shuffile = True

In [None]:
dataloader

<torch.utils.data.dataloader.DataLoader at 0x7bb7b69c8a50>

In [None]:
for features,labels in dataloader:
  print(features,labels)
  print('-'*50)

tensor([[ 1.7273, -1.1858],
        [-0.5872, -1.9717]]) tensor([1, 0])
--------------------------------------------------
tensor([[-0.7206, -0.9606],
        [ 1.8997,  0.8344]]) tensor([0, 1])
--------------------------------------------------
tensor([[-1.1402, -0.8388],
        [-2.8954,  1.9769]]) tensor([0, 0])
--------------------------------------------------
tensor([[-1.9629, -0.9923],
        [ 1.7774,  1.5116]]) tensor([0, 1])
--------------------------------------------------
tensor([[ 1.0683, -0.9701],
        [-0.9382, -0.5430]]) tensor([1, 1])
--------------------------------------------------


## **Transformation**

In [None]:
# You can apply transformation inside __getitem__() before returning the items
# i.e
# 1- converting into black and white of an image
# 2- resizeing
# 2- lower case,lamitization, stopwards etc in nlp

In [None]:
class Customdataset(Dataset):
  def __init__(self,features ,labels):
    self.features = features
    self.label = labels
  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self,index):
    return self.features[index],self.label[index]




In [None]:
dataset = Customdataset(X,y)

In [None]:
dataset

<__main__.Customdataset at 0x7bb7b6a01750>

In [None]:
len(dataset)

10

In [None]:
# data loader

dataloader1 = DataLoader(dataset,batch_size=2,shuffle=True)

In [None]:
dataloader1

<torch.utils.data.dataloader.DataLoader at 0x7bb7b6a089d0>

In [None]:
for batch_features,batch_label in dataloader1:
  print(batch_features,batch_label)
  print('*'*40)

tensor([[-0.7206, -0.9606],
        [ 1.8997,  0.8344]]) tensor([0, 1])
****************************************
tensor([[ 1.7774,  1.5116],
        [ 1.7273, -1.1858]]) tensor([1, 1])
****************************************
tensor([[-2.8954,  1.9769],
        [ 1.0683, -0.9701]]) tensor([0, 1])
****************************************
tensor([[-0.9382, -0.5430],
        [-1.9629, -0.9923]]) tensor([1, 0])
****************************************
tensor([[-0.5872, -1.9717],
        [-1.1402, -0.8388]]) tensor([0, 0])
****************************************


# **Whole Pipeline with Dataset and Data Loader**

In [None]:
df = pd.read_csv('/content/score_100.csv')

In [None]:
df.head()

Unnamed: 0,runs,form,score_100
0,28,6,0
1,48,9,1
2,9,2,0
3,55,5,1
4,17,4,0


In [None]:
x = df.iloc[:,:2]
y = df.iloc[:,-1]

In [None]:
y.head()

Unnamed: 0,score_100
0,0
1,1
2,0
3,1
4,0


In [None]:
x.head()

Unnamed: 0,runs,form
0,28,6
1,48,9
2,9,2
3,55,5
4,17,4


In [None]:
x = np.array(x)

In [None]:
x[:5]

array([[28,  6],
       [48,  9],
       [ 9,  2],
       [55,  5],
       [17,  4]])

In [None]:
y = np.array(y)
y[:5]

array([0, 1, 0, 1, 0])

In [None]:
from sklearn.model_selection import train_test_split

In [None]:


x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=.1,random_state=42)

In [None]:
# converting into tensor

x_train_tensor = torch.tensor(x_train,requires_grad=True,dtype=torch.float32)
x_test_tensor = torch.tensor(x_test,requires_grad=True,dtype=torch.float32)
y_train_tensor = torch.tensor(y_train,requires_grad=True,dtype=torch.float32)
y_test_tensor = torch.tensor(y_test,requires_grad=True,dtype=torch.float32)

### **CustomDataset Class and Data Loader**

In [None]:
class CustomDataset(Dataset):

  def __init__(self,features,labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, index):
    return self.features[index],self.labels[index]



In [None]:
train_dataset = CustomDataset(x_train_tensor,y_train_tensor)
test_dataset = CustomDataset(x_test_tensor,y_test_tensor)

In [None]:
train_dataset[10]

(tensor([66.,  1.], grad_fn=<SelectBackward0>),
 tensor(0., grad_fn=<SelectBackward0>))

In [None]:
test_dataset[1]

(tensor([31.,  3.], grad_fn=<SelectBackward0>),
 tensor(0., grad_fn=<SelectBackward0>))

### **Data Loader**

In [None]:
train_loader = DataLoader(train_dataset,batch_size=5,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=5,shuffle=True)

In [None]:
for fe , idx in train_loader:
  print(fe,idx)

tensor([[ 93.,   3.],
        [ 13.,   7.],
        [100.,   1.],
        [ 66.,   3.],
        [ 50.,   6.]], grad_fn=<StackBackward0>) tensor([1., 0., 1., 0., 1.], grad_fn=<StackBackward0>)
tensor([[46.,  5.],
        [45.,  3.],
        [55.,  5.],
        [14.,  9.],
        [65.,  7.]], grad_fn=<StackBackward0>) tensor([1., 0., 1., 1., 1.], grad_fn=<StackBackward0>)
tensor([[47.,  7.],
        [14.,  9.],
        [26., 10.],
        [45.,  1.],
        [88.,  9.]], grad_fn=<StackBackward0>) tensor([1., 1., 1., 0., 1.], grad_fn=<StackBackward0>)
tensor([[48.,  9.],
        [14.,  4.],
        [94.,  5.],
        [58.,  5.],
        [39.,  1.]], grad_fn=<StackBackward0>) tensor([1., 0., 1., 1., 0.], grad_fn=<StackBackward0>)
tensor([[27.,  4.],
        [96.,  3.],
        [ 9., 10.],
        [32.,  5.],
        [43., 10.]], grad_fn=<StackBackward0>) tensor([0., 1., 1., 0., 1.], grad_fn=<StackBackward0>)
tensor([[ 5.,  8.],
        [53.,  1.],
        [54.,  1.],
        [11.,  1.],


In [None]:
import torch.nn as nn

# **Model Designing**

In [None]:
class Classifier_NN(nn.Module):

  def __init__(self,num_features):
    super().__init__()
    self.linear = nn.Linear(num_features,1)
    self.sigmoid = nn.Sigmoid()

  def forward(self,features):
    out = self.linear(features)
    out = self.sigmoid(out)

    return out


In [None]:
learning_rate = 0.1
epochs = 20


In [None]:
# object of the model
model = Classifier_NN(x_train_tensor.shape[1])

# optimizer
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate)

# loss fucntion
loss_function = nn.BCELoss()

## **Training Pipeline**

In [None]:
for epoch in range(epochs):

  for batch_features,batch_label in train_loader:

    # forward pass
    y_pred = model(batch_features)

    # loss function
    loss = loss_function(y_pred,batch_label.reshape(-1,1))

    # clearing gradients
    optimizer.zero_grad()

    # Back propagation
    loss.backward()

    # updating parameters
    optimizer.step()


  print('Epoch: {} and Loss: {}'.format(epoch+1,loss))





Epoch: 1 and Loss: 40.0
Epoch: 2 and Loss: 60.0
Epoch: 3 and Loss: 80.0
Epoch: 4 and Loss: 82.2735366821289
Epoch: 5 and Loss: 60.060691833496094
Epoch: 6 and Loss: 40.19451141357422
Epoch: 7 and Loss: 0.0
Epoch: 8 and Loss: 60.0
Epoch: 9 and Loss: 20.0
Epoch: 10 and Loss: 43.04984664916992
Epoch: 11 and Loss: 17.391250610351562
Epoch: 12 and Loss: 6.821396827697754
Epoch: 13 and Loss: 20.0
Epoch: 14 and Loss: 40.0
Epoch: 15 and Loss: 80.0
Epoch: 16 and Loss: 60.0
Epoch: 17 and Loss: 40.0
Epoch: 18 and Loss: 20.0
Epoch: 19 and Loss: 40.0
Epoch: 20 and Loss: 40.0


2

## **Evaluation**

In [None]:
# model evaluation using data loader

model.eval() # set the model to evaluation mode

accuracy_list = []

with torch.no_grad():
  for batch_features,batch_label in test_loader:
    y_pred = model(batch_features)

    y_pred = (y_pred >0.5).float()    # converting probabilitites into binaries

    # calculate accuracy fo rthe current batch
    batch_accuracy = (y_pred.view(-1)==batch_label).float().mean().item()
    accuracy_list.append(batch_accuracy)

overall_accuracy = sum(accuracy_list)/len(accuracy_list)
print('Accuracy',overall_accuracy)

Accuracy 0.6000000238418579
