In [1]:
import data_loader
from data_loader import DataFolder

2781
597




In [2]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pandas as pd

plt.ion()   # interactive mode

<matplotlib.pyplot._IonContext at 0x1ba9afca3a0>

In [3]:
from torch.utils.data import DataLoader
from sklearn.model_selection import KFold

def k_fold_train(model,data,optimizer,criterion,scheduler,k_folds=5, num_epochs=25, batch_size=64):  
    # Set fixed random number seed
    torch.manual_seed(42)

    kfold = KFold(n_splits=k_folds, shuffle=True)
    best_result = 0.0
    for fold, (train_ids, test_ids) in enumerate(kfold.split(data)):
        print('---- Fold {}/{} ----'.format(fold+1, k_folds))
        train_subset = []
        for id in train_ids:
            train_subset.append(data.__getitem__(id))
        test_subset = []
        for id in test_ids:
            test_subset.append(data.__getitem__(id))
        train_sampler = data_loader.classes(train_subset)
        test_sampler = data_loader.classes(test_subset)
        #torch.utils.data.SubsetRandomSampler(test_ids)
        
        train_subsampler = data_loader.sampler(train_sampler[0],train_sampler[1],train_sampler[2])
        test_subsampler = data_loader.sampler(test_sampler[0],test_sampler[1],test_sampler[2])

        train_loader = DataLoader(dataset=train_subset, batch_size=batch_size,sampler= train_subsampler)
        test_loader = DataLoader(dataset=test_subset, batch_size=batch_size,sampler = test_subsampler)
    
        dataloaders = {'train': train_loader, 'test': test_loader}
        dataset_sizes = {'train': len(train_subsampler), 'test': len(test_subsampler)}
    
        since = time.time()

        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0

        for epoch in range(num_epochs):
            print('Epoch {}/{}'.format(epoch+1, num_epochs))
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'test']:
                if phase == 'train':
                    model.train()  # Set model to training mode
                else:
                    model.eval()   # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                for inputs, labels in dataloaders[phase]:
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]

                print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                    phase, epoch_loss, epoch_acc))

                # deep copy the model
                if phase == 'test' and epoch_acc > best_acc:
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())

            print()

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print('Best val Acc: {:4f}'.format(best_acc))

        # load best model weights
        model.load_state_dict(best_model_wts)

        if best_acc > best_result:
            best_result=best_acc
            best_model=model
    print('Best val Acc after k-folds: {:4f}'.format(best_result))        
    model_fit=best_model
    return model_fit


In [29]:
from random_baseline import stats
def test_model(testloader, model):

    model.eval()
    class_names = {'0': 'cctv', '1': 'other'}
    total_labels = []
    total_predicted = []
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _,predicted = torch.max(outputs.data,1)
            total_labels+=labels.numpy().tolist()
            total_predicted+=predicted.numpy().tolist()
            
    df = pd.DataFrame({'labels':total_labels, 'predictions':total_predicted})
    #df.to_csv('resnet18_10_predictions.csv')

    #Computes the test statistics of the predictions
    for c in np.array([0, 1]):
        accuracy, precision, recall, f1 = stats(df, ['labels', 'predictions'], c)
        print(class_names.get(str(c)) + ' simple accuracy is: ' + str(round(accuracy, 4)))
        print(class_names.get(str(c)) + ' precision is: ' + str(round(precision, 4)))
        print(class_names.get(str(c)) + ' recall is: ' + str(round(recall, 4)))
        print(class_names.get(str(c)) + ' f1 is: ' + str(round(f1, 4)))

In [9]:
k_folds = 5
num_epochs = 25
batch_size = 64

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model.fc = nn.Linear(num_ftrs, 2)

model = model.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
data = data_loader.train_dataset
print(data)

<data_loader.DataFolder object at 0x000001BA97590190>


In [10]:
model_fit = k_fold_train(model,data,optimizer,criterion,scheduler,k_folds,num_epochs,batch_size)

---- Fold 1/5 ----
Epoch 1/25
----------
train Loss: 0.3984 Acc: 0.8138
test Loss: 0.2147 Acc: 0.9138

Epoch 2/25
----------
train Loss: 0.1451 Acc: 0.9505
test Loss: 0.1343 Acc: 0.9461

Epoch 3/25
----------
train Loss: 0.0779 Acc: 0.9780
test Loss: 0.1711 Acc: 0.9336

Epoch 4/25
----------
train Loss: 0.0526 Acc: 0.9829
test Loss: 0.1627 Acc: 0.9318

Epoch 5/25
----------
train Loss: 0.0386 Acc: 0.9906
test Loss: 0.2360 Acc: 0.9156

Epoch 6/25
----------
train Loss: 0.0262 Acc: 0.9933
test Loss: 0.1649 Acc: 0.9210

Epoch 7/25
----------
train Loss: 0.0217 Acc: 0.9978
test Loss: 0.2634 Acc: 0.9066

Epoch 8/25
----------
train Loss: 0.0241 Acc: 0.9964
test Loss: 0.1884 Acc: 0.9138

Epoch 9/25
----------
train Loss: 0.0187 Acc: 0.9964
test Loss: 0.1861 Acc: 0.9372

Epoch 10/25
----------
train Loss: 0.0184 Acc: 0.9987
test Loss: 0.1679 Acc: 0.9264

Epoch 11/25
----------
train Loss: 0.0174 Acc: 0.9978
test Loss: 0.1787 Acc: 0.9246

Epoch 12/25
----------
train Loss: 0.0216 Acc: 0.9969
t

train Loss: 0.2291 Acc: 0.9196
test Loss: 0.1650 Acc: 0.9281

Epoch 21/25
----------
train Loss: 0.2292 Acc: 0.9240
test Loss: 0.1763 Acc: 0.9478

Epoch 22/25
----------
train Loss: 0.2344 Acc: 0.9204
test Loss: 0.1194 Acc: 0.9532

Epoch 23/25
----------
train Loss: 0.1978 Acc: 0.9294
test Loss: 0.1240 Acc: 0.9514

Epoch 24/25
----------
train Loss: 0.2514 Acc: 0.9263
test Loss: 0.1945 Acc: 0.9353

Epoch 25/25
----------
train Loss: 0.2064 Acc: 0.9308
test Loss: 0.2031 Acc: 0.9281

Training complete in 39m 28s
Best val Acc: 0.964029
---- Fold 5/5 ----
Epoch 1/25
----------
train Loss: 0.2562 Acc: 0.9236
test Loss: 0.0947 Acc: 0.9676

Epoch 2/25
----------
train Loss: 0.2165 Acc: 0.9276
test Loss: 0.1413 Acc: 0.9496

Epoch 3/25
----------
train Loss: 0.2193 Acc: 0.9290
test Loss: 0.1338 Acc: 0.9460

Epoch 4/25
----------
train Loss: 0.2287 Acc: 0.9196
test Loss: 0.1293 Acc: 0.9496

Epoch 5/25
----------
train Loss: 0.2627 Acc: 0.9133
test Loss: 0.1137 Acc: 0.9568

Epoch 6/25
----------


In [34]:
torch.save(model_fit, './models/resnet18_pretrained.pth')

In [31]:
model = torch.load('./models/resnet18_pretrained.pth')

In [16]:
model = torch.load('./models/resnet_18.pth')

In [32]:
test_model(data_loader.test_loader, model)

cctv simple accuracy is: 0.943
cctv precision is: 0.9088
cctv recall is: 0.9789
cctv f1 is: 0.9426
other simple accuracy is: 0.943
other precision is: 0.9793
other recall is: 0.9103
other f1 is: 0.9435
