In [1]:
# Problem 3 - used VSCode for this Problem

# import libraries
import numpy as np
import torch, os, copy, shutil
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import models, datasets, transforms
from torchvision.datasets import ImageFolder
import pandas as pd
from sklearn.metrics import confusion_matrix

if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')

Using cuda for inference


In [2]:
# upload dataset onto VSCode

#base_dir = "Kaggle Dogs vs Cats"
base_dir = "Cat vs Dog Dataset"
train_dir = os.path.join(base_dir,"train")

if not os.path.exists(f"{train_dir}/cat"):
        os.makedirs(f"{train_dir}/cat")
        
if not os.path.exists(f"{train_dir}/dog"):
        os.makedirs(f"{train_dir}/dog")

# sort the cats and dogs into two separate files
files = [filename for filename in os.listdir(train_dir) if not os.path.isdir(os.path.join(train_dir,filename))]

for filename in files:
        
        label = filename.split(".")[0]
        
        if(label == "cat"):
                shutil.move(f"{train_dir}/{filename}" , f"{train_dir}/cat/{filename}")
                
        elif(label == "dog"):
                shutil.move(f"{train_dir}/{filename}" , f"{train_dir}/dog/{filename}")

In [3]:
# set parameters and loaders
batches = 50
epochs = 5          
classes = 2           # cat and dog

data_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()])
dataset = datasets.ImageFolder(train_dir,transform=data_transform)
size = len(dataset)

train_data,test_data = random_split(dataset, [int(0.7*size) , int(0.3*size)] ) # train 70%, test 30%


train_loader = DataLoader(dataset=train_data, batch_size=batches, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size = batches, shuffle=False)

dataset_classes = dataset.classes 

In [4]:
# load ResNet18 and evaluate the model
resnet18 = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
print(resnet18.fc)

resnet18.fc = nn.Linear(resnet18.fc.in_features,2) # only need 2 classes (cat and dog)
print()
print(resnet18.fc)

resnet18 = resnet18.to(device)

Linear(in_features=512, out_features=1000, bias=True)

Linear(in_features=512, out_features=2, bias=True)


In [5]:
# compare true and predicted label
def compute_accuracy (model, data_loader, device):

    model = model.to(device)
    model = model.eval()
    
    num_correct_prediction = 0
    num_total_labels = 0
    
    with torch.no_grad():
        
        for i, (inputs, labels) in enumerate(data_loader):
            inputs = inputs.to(device)
            labels = labels
            
            probabilities = model(inputs.to(device))
            predicted_class = torch.argmax(probabilities.cpu(), dim=1)
            
            num_total_labels += labels.size()[0]
            num_correct_prediction += (predicted_class == labels).sum()

    return num_correct_prediction/num_total_labels * 100

In [6]:
# train ResNet18 model
def train_model (model, data_loader, learning_rate, num_epochs, device):
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

    best_weights = copy.deepcopy(model.state_dict())
    best_accuracy = 0
    
    for epochs in range(num_epochs):
        
        print(f"\nEPOCH: {epochs+1}\n")
      
        model.train()
        for batch_index, (inputs, labels) in enumerate(data_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)   
             
            probabilities = model(inputs)
            
            optimizer.zero_grad()
            loss = nn.functional.cross_entropy(probabilities,labels)
                
            loss.backward()
            optimizer.step()
            
            
            if (batch_index % 50 == 0) :
                print(f"Batch: {batch_index}/{len(data_loader)}    Loss: {loss}")
            
        model.eval()      # evaluate the model
        with torch.set_grad_enabled(False):
            accuracy = compute_accuracy(model,data_loader,device)
            print(f"\nAccuracy: {accuracy}%\n")
            print("-----------------------------------")
        if (accuracy > best_accuracy):
            best_accuracy = accuracy
            best_weights = model.state_dict()
    
    model.load_state_dict(best_weights)
    return model


In [7]:
resnet18 = train_model(resnet18,train_loader,0.001,epochs,device)


EPOCH: 1

Batch: 0/350    Loss: 0.6471872925758362
Batch: 50/350    Loss: 0.31590649485588074
Batch: 100/350    Loss: 0.2364652305841446
Batch: 150/350    Loss: 0.1781313121318817
Batch: 200/350    Loss: 0.15078173577785492
Batch: 250/350    Loss: 0.1603356897830963
Batch: 300/350    Loss: 0.1370546668767929

Accuracy: 97.45143127441406%

-----------------------------------

EPOCH: 2

Batch: 0/350    Loss: 0.17332641780376434
Batch: 50/350    Loss: 0.10430306196212769
Batch: 100/350    Loss: 0.0916546881198883
Batch: 150/350    Loss: 0.04703783988952637
Batch: 200/350    Loss: 0.23093071579933167
Batch: 250/350    Loss: 0.05816836282610893
Batch: 300/350    Loss: 0.09287645667791367

Accuracy: 98.02857208251953%

-----------------------------------

EPOCH: 3

Batch: 0/350    Loss: 0.1497117578983307
Batch: 50/350    Loss: 0.06633371114730835
Batch: 100/350    Loss: 0.08906285464763641
Batch: 150/350    Loss: 0.08916011452674866
Batch: 200/350    Loss: 0.11589488387107849
Batch: 250/35

In [8]:
# test ResNet18 after training
predicted_class18 = torch.empty(0).to(device)
true_label_18 = torch.empty(0)

resnet18 = resnet18.eval()

for batch_index, (inputs, labels) in enumerate(test_loader):

    true_label_18 = torch.cat((true_label_18,labels))
    
    with torch.no_grad():
        probabilities = resnet18(inputs.to(device))
        predicted_class18 = torch.cat((predicted_class18,torch.argmax(probabilities,dim=1)))

with torch.no_grad():
        prob = resnet18(inputs.to(device))
        pred_class = torch.argmax(prob, dim=1)

print(f"\n\nPredicted Class: {dataset_classes[int(pred_class[10].item())]}")
print(f"Actual Class: {dataset_classes[int(labels[10].item())]}")
print(f"\nTotal Accuracy: {(compute_accuracy(resnet18,test_loader,device))}%\n")



Predicted Class: dog
Actual Class: dog

Total Accuracy: 98.54666900634766%



In [9]:
# sklearn results - resnet18
y_true = true_label_18.to('cpu')
y_pred = predicted_class18.to('cpu')

class_label = ["cat","dog"]

confusion = confusion_matrix(y_true,y_pred)

cfm = pd.DataFrame(confusion,index=class_label, columns=class_label)
print("\n\nConfusion Matrix for ResNet18:\n")
print(cfm)

tp = np.array([confusion[i][i] for i in range(len(confusion[0]))])
fp = np.array([sum(confusion[:,i]) for i in range(len(confusion[0]))]) - tp
fn = np.array([sum(confusion[i,:]) for i in range(len(confusion[0]))])  - tp
tn = np.array([sum(sum(confusion)) for i in range(len(confusion[0]))]) - tp - fp - fn

precision = tp / (fp+tp)
recall = tp / (fn+tp)
fscore = 2 * (precision * recall)/(precision +recall)
accuracy = tp / (fn+tp)

# showing average values
print(f"\nAccuracy: {sum(accuracy)/len(accuracy)}") 
print(f"F-score: {sum(fscore)/len(fscore)}")
print(f"Precison: {sum(precision)/len(precision)}")
print(f"Recall: {sum(recall)/len(recall)}")



Confusion Matrix for ResNet18:

      cat   dog
cat  3733    26
dog    83  3658

Accuracy: 0.9854483428491214
F-score: 0.9854652131879854
Precison: 0.9855959648727457
Recall: 0.9854483428491214


In [10]:
epoch = 3 # ResNet50 has more layers than ResNet18, so I reduced the number of epochs

# load and train ResNet50 model
resnet50 = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
print(resnet50.fc)

# we only need 2 classes need to cahnge ouput layer
resnet50.fc = nn.Linear(resnet50.fc.in_features,2)
print()
print(resnet50.fc)

resnet50 = resnet50.to(device)
resnet50 = train_model(resnet50,train_loader,0.001,epoch,device)

Linear(in_features=2048, out_features=1000, bias=True)

Linear(in_features=2048, out_features=2, bias=True)

EPOCH: 1

Batch: 0/350    Loss: 0.6808891296386719
Batch: 50/350    Loss: 0.5249034762382507
Batch: 100/350    Loss: 0.4600221514701843
Batch: 150/350    Loss: 0.3469408452510834
Batch: 200/350    Loss: 0.34465453028678894
Batch: 250/350    Loss: 0.2929426431655884
Batch: 300/350    Loss: 0.2628886103630066

Accuracy: 97.84571838378906%

-----------------------------------

EPOCH: 2

Batch: 0/350    Loss: 0.25009748339653015
Batch: 50/350    Loss: 0.21329748630523682
Batch: 100/350    Loss: 0.2063545286655426
Batch: 150/350    Loss: 0.1599852740764618
Batch: 200/350    Loss: 0.14842699468135834
Batch: 250/350    Loss: 0.11750787496566772
Batch: 300/350    Loss: 0.1434956043958664

Accuracy: 98.34285736083984%

-----------------------------------

EPOCH: 3

Batch: 0/350    Loss: 0.10729196667671204
Batch: 50/350    Loss: 0.11569489538669586
Batch: 100/350    Loss: 0.1261781305074

In [11]:
# testing ResNet50 model
predicted_class50 = torch.empty(0).to(device)
true_label50 = torch.empty(0)

resnet50 = resnet50.eval()

for batch_index, (inputs, labels) in enumerate(test_loader):

    true_label50 = torch.cat((true_label50,labels))
    
    with torch.no_grad():
        probabilities = resnet50(inputs.to(device))
        predicted_class50 = torch.cat((predicted_class50,torch.argmax(probabilities,dim=1)))


with torch.no_grad():
        prob = resnet50(inputs.to(device))
        pred_class = torch.argmax(prob, dim=1)

print(f"\n\nPredicted Class: {dataset_classes[int(pred_class[1].item())]}")
print(f"Actual Class: {dataset_classes[int(labels[1].item())]}")
print(f"\nTotal Accuracy: {(compute_accuracy(resnet50,test_loader,device))}%\n")



Predicted Class: dog
Actual Class: dog

Total Accuracy: 98.41333770751953%



In [12]:
# sklearn results

y_true = true_label50.to('cpu')
y_pred = predicted_class50.to('cpu')

confusion = confusion_matrix(y_true,y_pred)

class_label = ["cat","dog"]

cfm = pd.DataFrame(confusion,index=class_label,columns=class_label)
print("\n\nConfusion Matrix for ResNet50:\n")
print(cfm)

tp = np.array([confusion[i][i] for i in range(len(confusion[0]))])
fp = np.array([sum(confusion[:,i]) for i in range(len(confusion[0]))]) - tp
fn = np.array([sum(confusion[i,:]) for i in range(len(confusion[0]))])  - tp
tn = np.array([sum(sum(confusion)) for i in range(len(confusion[0]))]) - tp - fp - fn

precision = tp / (fp+tp)
recall = tp / (fn+tp)
fscore = 2 * (precision * recall)/(precision +recall)
accuracy = tp / (fn+tp)

# showing average values
print(f"\nAccuracy: {sum(accuracy)/len(accuracy)}") 
print(f"F-score: {sum(fscore)/len(fscore)}")
print(f"Precison: {sum(precision)/len(precision)}")
print(f"Recall: {sum(recall)/len(recall)}")



Confusion Matrix for ResNet50:

      cat   dog
cat  3715    44
dog    75  3666

Accuracy: 0.9841233218836674
F-score: 0.9841326560445718
Precison: 0.9841756217596314
Recall: 0.9841233218836674
