## Imports

In [1]:
#Data Procssing
import pandas
import matplotlib.pylab as plt #numpy + pyplot as 1 library

#Image Processing
from PIL import Image
from matplotlib.pyplot import imshow

#File handling
import os
import glob

#Pytorch
# import torch
# from torch.utils.data import Dataset

#IBM library 
# import skillsnetworkb

## Dataset

**Link:** https://data.mendeley.com/datasets/5y9wdsg2zt/2

**Size:** 203MB

**Classes:** 2 - positive(cracked),negative(not cracked)

**Images:** 40000 , 227 x 227, RGB channels.

## Helper Functions

Function to plot images

In [2]:
def show_data(data_sample, shape = (28, 28)):
    plt.imshow(data_sample[0].numpy().reshape(shape), cmap='gray')
    plt.title('y = ' + data_sample[1])

Function to plot image from path

In [3]:
def show_image(path,title):
    image = Image.open(path)
    plt.imshow(image)
    plt.title(title)
    plt.show()

Function to return list of images full path

In [4]:
def file_maker(directory,positive_directory,negative_directory):
    positive_file_path = os.path.join(directory,positive_directory)
    positive_files=[os.path.join(positive_file_path,file) for file in  os.listdir(positive_file_path) if file.endswith(".jpg")]
    positive_files.sort()

    negative_file_path = os.path.join(directory,negative_directory)
    negative_files=[os.path.join(negative_file_path,file) for file in  os.listdir(negative_file_path) if file.endswith(".jpg")]
    negative_files.sort()
    
    return positive_files,negative_files

Function to build Dataset

In [5]:
class Dataset(Dataset):

    # Constructor
    def __init__(self,transform=None,train=True):
        directory="/resources/data"
        positive="Positive"
        negative="Negative"
        
        #define paths
        positive_file_path=os.path.join(directory,positive)
        negative_file_path=os.path.join(directory,negative)
        
        #make list of full image paths
        positive_files=[os.path.join(positive_file_path,file) for file in  os.listdir(positive_file_path) if file.endswith(".jpg")]
        negative_files=[os.path.join(negative_file_path,file) for file in  os.listdir(negative_file_path) if file.endswith(".jpg")]
        
        #sort lists
        positive_files.sort()
        negative_files.sort()
        
        #total number of images
        number_of_samples = len(positive_files) + len(negative_files)
        
        #Combine both list. Even = +ve, Odd = -ve
        self.all_files=[None]*number_of_samples
        self.all_files[::2]=positive_files
        self.all_files[1::2]=negative_files 
        
        # The transform is goint to be used on image
        self.transform = transform
        
        #List of output class 
        self.Y=torch.zeros([number_of_samples]).type(torch.LongTensor)
        #Even = 1/positive, Odd = 0/negative
        self.Y[::2]=1
        self.Y[1::2]=0
        
        
        #if train on [0-30000], val on [30000-40000]
        if train:
            self.Y=self.Y[0:30000]
            self.len=len(self.all_files)
        else:

            self.Y=self.Y[30000:]
            self.len=len(self.all_files)
       
    # Get the length
    def __len__(self):
        return self.len
    
    # Getter
    def __getitem__(self, idx):
        #open image tensor and output
        image=Image.open(self.all_files[idx])
        y=self.Y[idx]
          
        # If there is any transform method, apply it onto the image
        if self.transform:
            image = self.transform(image)

        return image, y

NameError: name 'Dataset' is not defined

Function to train model

In [7]:
def train_model(num_epochs,model,input_size):
    for epoch in range(num_epochs):
        #initialize model for training
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        
        #Train model and optimize weights/bias
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs.view(-1,input_size))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            #calculate loss and prediction per batch
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()
        
        #Calculate Training loss and accuracy per epoch
        train_loss = running_loss / len(train_loader)
        train_acc = (correct_train / total_train) * 100
        train_losses.append(train_loss)
        train_accuracy.append(train_acc)
        
        #Initialize model for testing
        model.eval()
        running_loss = 0.0
        correct_test = 0
        total_test = 0
        
        #Turn off gradients for memory saving
        with torch.no_grad():
            
            #Test model on Test data
            for inputs, labels in validation_loader:
                outputs = model(inputs.view(-1,input_size))
                loss = criterion(outputs, labels)
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_test += labels.size(0)
                correct_test += (predicted == labels).sum().item()
        #Calculate Validation loss and accuracy per epoch
        test_loss = running_loss / len(validation_loader)
        test_acc = (correct_test / total_test) * 100
        test_losses.append(test_loss)
        test_accuracy.append(test_acc)

        print(f'Epoch [{epoch + 1}/{num_epochs}] - '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% - '
              f'Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')
        
        return train_loss,train_acc,test_loss,test_acc

Function to plot results

In [8]:
def plot_results(train_loss,train_acc,test_loss,test_acc):
    # Plot training and testing loss
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(train_losses, label='Train Loss')
    plt.plot(test_losses, label='Test Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot training and testing accuracy
    plt.subplot(1, 2, 2)
    plt.plot(train_accuracy, label='Train Accuracy')
    plt.plot(test_accuracy, label='Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy (%)')
    plt.legend()

    plt.tight_layout()
    plt.show()

## Steps to follow

1. Download Data
2. Make a transform compose
3. Make Dataset objects
4. Create softmax module
5. Build softmax object
6. Build optimizer and criterion
7. Build Dataloader objects
8. Train model
9. Analyze results

### Step 1

### Step 2

In [9]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

transform =transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean, std)])

NameError: name 'transforms' is not defined

### Step 3

In [10]:
dataset_train=Dataset(transform=transform,train=True)
dataset_val=Dataset(transform=transform,train=False)

NameError: name 'Dataset' is not defined

### Step 4

In [None]:
size_of_image=3*227*227
torch.manual_seed(0)

In [11]:
class Softmax(torch.nn.Module):
    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        self.linear = torch.nn.Linear(n_inputs, n_outputs)

    def forward(self, x):
        pred = self.linear(x)
        return pred

NameError: name 'torch' is not defined

### Step 5

In [None]:
input_dim = size_of_image
output_dim = 2 #1/0

model_softmax = Softmax(input_dim,output_dim)
print(model_softmax.state_dict())

### Step 6

In [None]:
learning_rate = 0.1
momentum_term = 0.1

optimizer = torch.optim.SGD(model_softmax.parameters(), lr=learning_rate, momentum=momentum_term)
criterion = nn.CrossEntropyLoss()

### Step 7

In [None]:
batch_size_train = 5
train_loader = torch.utils.data.DataLoader(dataset=dataset_train, batch_size=batch_size_train)
validation_loader = torch.utils.data.DataLoader(dataset=dataset_val)

### Step 8

In [None]:
n_epochs = 5
train_losses = []
test_losses = []
train_accuracy = []
test_accuracy = []

train_model(n_epochs,model=model_softmax,input_size=input_dim)

### Step 9

In [None]:
plot_results(train_losses,train_accuracy,test_loss,test_acc)