### *Data Processing Pipeline by use Pytorch*

In [64]:
import torch
import torch.nn as nn
import pandas as pd
from torchvision import transforms
import os

In [65]:
''' Data Load '''
root_dir = '/Users/mahadiur/Desktop/Deep Learning Data Processing/Data_Processing_Pipeline/Data'
dataset_path = os.path.join(root_dir, 'DigitDataset.csv')

digit_data = pd.read_csv(
    dataset_path
)
digit_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [66]:
''' Check Device '''
Device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(Device)

cpu


In [67]:
''' Processing single example of the dataset '''
pixels = digit_data.iloc[0].values[1:].astype('float')
label = digit_data.iloc[0].values[0:1]
print(pixels.shape)
print(label.shape)
pixels = pixels.reshape(-1, 28, 28)
print(pixels.shape)

(784,)
(1,)
(1, 28, 28)


In [68]:
''' Transformation '''
Tranformations = transforms.Compose([
    transforms.Normalize(
        mean=torch.tensor([0.1307]),
        std=torch.tensor([0.3081])
    )
])

### *Data Processing Pipeline*

In [69]:
from torch.utils.data import Dataset

class DataPipeline(Dataset):
    def __init__(self, file_path, transformation=None):
        super().__init__()
        self.data = pd.read_csv(file_path)
        print(self.data.head())
        self.transformation = transformation
    
    # Dataset length
    def __len__(self):
        return len(self.data)
    
    # Process Data
    def __getitem__(self, index):
        pixels = self.data.iloc[index].values[1:].astype('float32')
        label = int(self.data.iloc[index].values[0])

        pixels = torch.tensor(pixels)
        label = torch.tensor(label)

        pixels = pixels.reshape(28, 28).unsqueeze(0) / 255
        if self.transformation:
            pixels = Tranformations(pixels)
        return pixels, label
    

dataset = DataPipeline(
    file_path=dataset_path,
    transformation=Tranformations
    )
print(len(dataset))

   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
0      1       0       0       0       0       0       0       0       0   
1      0       0       0       0       0       0       0       0       0   
2      1       0       0       0       0       0       0       0       0   
3      4       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel8  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0         0         

### *Train-Test-Validation*

In [70]:
'''
TrainDataset > BatchSize > Step 
All Batch run > 1 epoch 
'''
from torch.utils.data import  random_split

torch.manual_seed(50)

train_size = int(0.7 * len(dataset))
validation_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - validation_size

Train_dataset, Validation_dataset, Test_dataset = random_split(
    dataset=dataset,
    lengths=[train_size, validation_size, test_size]
)

print(len(Train_dataset))
print(len(Test_dataset))
print(len(Validation_dataset))

29399
6301
6300


### **Train, Test & Validation Dataloader**

In [71]:
from torch.utils.data import DataLoader

Train_Dataloader = DataLoader(
    dataset=Train_dataset,
    batch_size=16,
    shuffle=True
)

Test_Dataloader = DataLoader(
    dataset=Test_dataset,
    batch_size=16,
    shuffle=False
)

Validation_Dataloader = DataLoader(
    dataset=Validation_dataset,
    batch_size=16,
    shuffle=False
)

In [72]:
for pixel_batch, label_batch in Train_Dataloader:
    print(pixel_batch.shape)
    print(label_batch.shape)
    break

torch.Size([16, 1, 28, 28])
torch.Size([16])


### **Digit Classifiar**

In [73]:
class DigitClassifiar(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(28 * 28, 512)
        self.layer2 = nn.Linear(512, 256)
        self.layer3 = nn.Linear(256, 128)
        self.layer4 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = self.layer1(x) # cross 1
        x = torch.relu(x)
        x = self.layer2(x) # cross 2
        x = torch.relu(x)
        x = self.layer3(x) # cross 3
        x = torch.relu(x)
        x = self.layer4(x) # cross 4
        return x
    


### **Run epochs**

In [74]:
def Modle_evaluation(model, data_loader, gredient, device):
    model.eval()
    total_loss = 0
    actual_val = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(data_loader):
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = gredient(outputs, target)
            total_loss += loss.item()

            _, predict = torch.max(outputs.data, 1)
            actual_val += (predict == target).sum().item() 
        total_loss /= len(data_loader.dataset)
        actual_val /= len(data_loader.dataset)
    return total_loss, actual_val


### **Train Model**

In [75]:
gredient = nn.CrossEntropyLoss()
import torch.optim as optimizer

def Train_Model(model, train_loader, val_loader, device, gredient, optimizer, checkpoint_path, num_epochs ):
    model.train()  
    best_model_accurecy = 0
    patience = 3

    checkpoint_dir = os.path.dirname(checkpoint_path)
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)    

    for epoch in range(num_epochs):
        train_loss = 0
        for batch_idx, (pixel_batch, label_batch) in enumerate(train_loader):
            pixel_batch, label_batch = pixel_batch.to(device), label_batch.to(device)
            optimizer.zero_grad()
            outputs = model(pixel_batch)
            loss = gredient(outputs, label_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader.dataset)
        val_loss, val_accuracy = Modle_evaluation(
            model=model,
            data_loader=val_loader,
            gredient=gredient,
            device=device
        )

        if  val_accuracy > best_model_accurecy:
           best_model_accurecy = val_accuracy
           torch.save(model.state_dict(), checkpoint_path)
           print('Save mode best parameter')
        else:
           patience = patience - 1
    

        print(f'epochs {epoch+1} / {num_epochs} loss {train_loss:3f}')
        print(f'Validation loss {val_loss:3f}')
        print(f'Validation Accuracy {val_accuracy:3f}')

        if patience == 0:
           print('Model Permormance is not improving.')
           break

In [76]:
model = DigitClassifiar().to(Device)
optimizer = optimizer.Adam(model.parameters(), lr=0.001)

In [77]:
checkpoint_path = os.path.join(
    os.getcwd(), 'checkpoints', 'Best_Model.pth'
)

In [78]:
if os.path.exists(checkpoint_path):
    model.load_state_dict(torch.load(checkpoint_path))
    print('Model Loaded Successfully from checkpoint')

Model Loaded Successfully from checkpoint


In [79]:
Train_Model(
    model=model,
    train_loader=Train_Dataloader,
    val_loader=Validation_Dataloader,
    gredient=gredient,
    optimizer=optimizer,
    checkpoint_path=checkpoint_path,
    device=Device,
    num_epochs=5
)

Save mode best parameter
epochs 1 / 5 loss 0.002503
Validation loss 0.018030
Validation Accuracy 0.970476
epochs 2 / 5 loss 0.002159
Validation loss 0.015473
Validation Accuracy 0.969841
epochs 3 / 5 loss 0.001995
Validation loss 0.015391
Validation Accuracy 0.969206
epochs 4 / 5 loss 0.001573
Validation loss 0.015947
Validation Accuracy 0.969683
Model Permormance is not improving.
