In [1]:
%load_ext autoreload
%autoreload 2

## Preprocessing
Run to generate the folder for the Torch ImageLoader class

In [None]:
# find closest image to resistivity timestamp
#def find_closest_image(ts):
    # Given a presorted list of timestamps:  s = sorted(index)
#    i = bisect_left(s, ts)
#    return min(s[max(0, i-1): i+2], key=lambda t: abs(ts - t))

import io
import csv
import shutil
img_folder = 'data/timelapse_images_fast'

with open('data/labels.csv') as csvfile:
    readCSV = csv.reader(csvfile, delimiter=',')
    for row in readCSV:
        img_name = row[1]
        label = int(row[2]=='True')
        month_folder = row[0][:10]
        #print(month_folder)
        #print(img_name,label)
        #print(os.path.join(img_folder,month_folder,img_name))
        shutil.copyfile(os.path.join(img_folder,month_folder,img_name),os.path.join(img_folder,str(label),img_name))

In [2]:
import os
import torch
import pandas as pd
#from skimage import io, transform
import matplotlib.pyplot as plt

import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data
from torch.utils.data.sampler import SubsetRandomSampler

from torch.autograd import Variable
import torch.nn.functional as F
import torchvision.models as models
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

In [3]:
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7fd42c0efc90>

In [4]:
opts = {}
opts['epochs'] = 10
opts['batch_size'] = 64
opts['lr'] = 0.003
opts['nr_classes'] = 2

TRAIN_DATA_PATH = "./data/binary_classification"
TEST_DATA_PATH = "./test_named_cl/"
TRANSFORM_IMG = transforms.Compose([
    transforms.CenterCrop(64),
    #transforms.Resize(256),
    transforms.ToTensor(),
    #transforms.Normalize(mean=[0.485, 0.456, 0.406],
    #                     std=[0.229, 0.224, 0.225] )
    ])

writer = SummaryWriter()

train_data = torchvision.datasets.ImageFolder(root=TRAIN_DATA_PATH, transform=TRANSFORM_IMG)
print(len(train_data.imgs))
# split into (train,val,test)
dataset_size = len(train_data)
indices = list(range(dataset_size))
split = int(np.floor(0.1 * dataset_size))
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
train_data_loader = data.DataLoader(train_data, batch_size=opts['batch_size'], sampler=train_sampler,  num_workers=4)
valid_data_loader = data.DataLoader(train_data, batch_size=opts['batch_size'], sampler=valid_sampler,  num_workers=4)

#train_iter = iter(train_data_loader)
#test_data = torchvision.datasets.ImageFolder(root=TEST_DATA_PATH, transform=TRANSFORM_IMG)
#test_data_loader  = data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) 

102716


## Models

In [5]:
class BinaryCNN(torch.nn.Module):    
    def __init__(self):
        super(BinaryCNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(7 * 7 * 64, 1000)
        self.fc2 = nn.Linear(1000, 4)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out
    
class CNN2(nn.Module):
    # initializers
    def __init__(self, d=32):
        super(CNN2, self).__init__()
        self.conv1 = nn.Conv2d(3, d, 4, 2, 1)
        self.conv2 = nn.Conv2d(d, d*2, 4, 2, 1)
        self.conv2_bn = nn.BatchNorm2d(d*2)
        self.conv3 = nn.Conv2d(d*2, d*4, 4, 2, 1)
        self.conv3_bn = nn.BatchNorm2d(d*4)
        self.conv4 = nn.Conv2d(d*4, d*8, 4, 2, 1)
        self.conv4_bn = nn.BatchNorm2d(d*8)
        #print(self.conv4_bn)
        self.linear = nn.Linear(d*8*4*4,2)
        #self.conv5 = nn.Conv2d(d*8, 4, 4, 1, 0)

    # weight_init
    def weight_init(self, mean, std):
        for m in self._modules:
            normal_init(self._modules[m], mean, std)

    # forward method
    def forward(self, input):
        x = F.leaky_relu(self.conv1(input), 0.2)
        x = F.leaky_relu(self.conv2_bn(self.conv2(x)), 0.2)
        x = F.leaky_relu(self.conv3_bn(self.conv3(x)), 0.2)
        x = F.leaky_relu(self.conv4_bn(self.conv4(x)), 0.2)
        x = x.reshape(x.size(0), -1)
        #x = F.sigmoid(self.conv5(x))
        x = self.linear(x)

        return x
    
def normal_init(m, mean, std):
    if isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Conv2d):
        m.weight.data.normal_(mean, std)
        m.bias.data.zero_()

## Training loop

In [6]:
import torch.optim as optim

net = CNN2()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=opts['lr'])
bins = np.array([0,91,182,273,366])
nr_batches = len(train_data_loader)

for epoch in range(opts['epochs']):  
    running_loss = 0.0
    for i, sample in tqdm(enumerate(train_data_loader, 0)):
        inputs, labels = sample
        # quantize labels to 4 bins
        #labels_q = torch.tensor(np.digitize(labels,bins))
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
        
        # tensorboard 
        writer.add_scalar('Loss/train', loss, epoch*nr_batches + i)
    
    # run on test set
    test_losses = []
    for i, test_sample in tqdm(enumerate(valid_data_loader, 0)):
        test_inputs, test_labels = test_sample
        # forward
        test_outputs = net(test_inputs)
        test_loss = criterion(test_outputs, test_labels)
        test_losses.append(test_loss)
        
    writer.add_scalar('Loss/test', np.mean(test_losses), epoch)
    
print('Finished Training')

1445it [03:16,  7.36it/s]
161it [00:08, 18.83it/s]
1445it [03:05,  7.81it/s]
161it [00:08, 18.84it/s]
1445it [03:04,  7.82it/s]
161it [00:08, 18.79it/s]
1445it [03:08,  7.66it/s]
161it [00:08, 19.02it/s]
1445it [03:05,  7.78it/s]
161it [00:08, 18.51it/s]
1445it [03:06,  7.76it/s]
161it [00:08, 19.03it/s]
1445it [03:04,  7.84it/s]
161it [00:08, 19.06it/s]
1207it [02:35,  7.51it/s]

KeyboardInterrupt: 

## Misc

In [9]:
#import seaborn as sns
#data = pd.read_csv("data/timeseries_derived_data_products/MH11_resistivity_rock_2017.csv") 
# Preview the first 5 lines of the loaded data 
data = pd.read_csv("data/labels.csv") 
#pd.DataFrame.plot.line(data)
data

Unnamed: 0,2017-01-01 07:00:00+00:00,20170101_070009.JPG,True
0,2017-01-01 07:04:00+00:00,20170101_070409.JPG,True
1,2017-01-01 07:08:00+00:00,20170101_070809.JPG,True
2,2017-01-01 07:12:00+00:00,20170101_071211.JPG,True
3,2017-01-01 07:16:00+00:00,20170101_071610.JPG,True
4,2017-01-01 07:20:00+00:00,20170101_072011.JPG,True
...,...,...,...
102710,2017-12-31 23:00:00+00:00,20171231_234008.JPG,True
102711,2017-12-31 23:00:00+00:00,20171231_234407.JPG,True
102712,2017-12-31 23:00:00+00:00,20171231_234807.JPG,True
102713,2017-12-31 23:00:00+00:00,20171231_235207.JPG,True


In [37]:
labels = np.array([2,30,330,102,35,67,82,230])
bins = np.array([1,90,180,270,360])
np.histogram(labels, bins)
np.digitize(labels,bins)

array([1, 1, 4, 2, 1, 1, 1, 3])

In [None]:
for batch_idx, sample in enumerate(train_data_loader):
    images, labels = sample
    labels_q = np.digitize(labels,bins)
    plt.imshow(  sample[0][0].permute(1, 2, 0)  )


In [None]:
max_val = 0
min_val = 100
for i, sample in tqdm(enumerate(train_data_loader, 0)):
    max_tmp = torch.max(sample[1])
    min_tmp = torch.min(sample[1])

    if max_tmp > max_val:
        max_val = max_tmp
    if min_tmp < min_val:
        min_val = min_tmp

print(max_val)
print(min_val)

In [None]:
resnet18 = models.resnet18()