In [1]:
import time
import os
import faiss
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset
import torch.nn as nn
import pandas as pd
from PIL import Image

import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.datasets as datasets
import torchvision.models as models

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
class GPSDataset(Dataset):
    def __init__(self, metadata, root_dir,transform1=None, transform2=None):
        self.metadata = pd.read_csv(metadata).values
        self.root_dir = root_dir
        self.transform1 = transform1
        self.transform2 = transform2

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.metadata[idx][0])
        image =  Image.open(img_name).convert('RGB')
        if self.transform1:
            img1 = self.transform1(image)
        if self.transform2:
            img2 = self.transform2(image)
            return img1, img2, idx
                
        return img1, idx

class AUGLoss(nn.Module):
    def __init__(self):
        super(AUGLoss, self).__init__()

    def forward(self, x1, x2):
        b = (x1 - x2)
        b = b*b
        b = b.sum(1)
        b = torch.sqrt(b)
        return b.sum()

# Below codes are from Deep Clustering for Unsupervised Learning of Visual Features github code        
def preprocess_features(npdata, pca):
    _, ndim = npdata.shape
    npdata =  npdata.astype('float32')

    # Apply PCA-whitening with Faiss
    mat = faiss.PCAMatrix (ndim, pca, eigen_power=-0.5)
    mat.train(npdata)
    assert mat.is_trained
    npdata = mat.apply_py(npdata)

    # L2 normalization
    row_sums = np.linalg.norm(npdata, axis=1)
    npdata = npdata / row_sums[:, np.newaxis]

    return npdata

def cluster_assign(images_lists, dataset):
    assert images_lists is not None
    pseudolabels = []
    image_indexes = []
    for cluster, images in enumerate(images_lists):
        image_indexes.extend(images)
        pseudolabels.extend([cluster] * len(images))

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    t = transforms.Compose([transforms.RandomResizedCrop(224),
                            transforms.RandomHorizontalFlip(),
                            transforms.ToTensor(),
                            normalize])

    return ReassignedDataset(image_indexes, pseudolabels, dataset, t)


def run_kmeans(x, nmb_clusters):
    n_data, d = x.shape

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)

    # Change faiss seed at each k-means so that the randomly picked
    # initialization centroids do not correspond to the same feature ids
    # from an epoch to another.
    clus.seed = np.random.randint(1234)

    clus.niter = 20
    clus.max_points_per_centroid = 10000000
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.useFloat16 = False
    flat_config.device = 0
    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    # perform the training
    clus.train(x, index)
    _, I = index.search(x, 1)
#     losses = faiss.vector_to_array(clus.obj)
    stats = clus.iteration_stats
    losses = np.array([stats.at(i).obj for i in range(stats.size())])
    print('k-means loss evolution: {0}'.format(losses))

    return [int(n[0]) for n in I], losses[-1]


def compute_features(dataloader, model, N, batch_size):
    model.eval()
    # discard the label information in the dataloader
    for i, (inputs, _) in enumerate(dataloader):
        inputs = inputs.cuda()
        aux = model(inputs).data.cpu().numpy()
        aux = aux.reshape(-1, 1280)
        if i == 0:
            features = np.zeros((N, aux.shape[1]), dtype='float32')

        aux = aux.astype('float32')
        if i < len(dataloader) - 1:
            features[i * batch_size: (i + 1) * batch_size] = aux
        else:
            features[i * batch_size:] = aux

    return features  


class Kmeans(object):
    def __init__(self, k):
        self.k = k

    def cluster(self, data,pca):
        end = time.time()

        # PCA-reducing, whitening and L2-normalization
        xb = preprocess_features(data,pca)

        # cluster the data
        I, loss = run_kmeans(xb, self.k)
        self.images_lists = [[] for i in range(self.k)]
        label = []
        for i in range(len(data)):
            label.append(I[i])
            self.images_lists[I[i]].append(i)
            
        label = torch.tensor(label).cuda()
        print(label)

        print('k-means time: {0:.0f} s'.format(time.time() - end))

        return loss, label


In [4]:
torch.manual_seed(3)
torch.cuda.manual_seed_all(3)
np.random.seed(3)

model = torch.load('/home/haoying/res_zl12_effnet_b0_9.7km/res_pretrained.pt')

In [5]:
model

EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=

In [6]:
# feature = model._fc.in_features
# model._fc = nn.Linear(in_features=feature,out_features=3,bias=True)
# print(model)

In [6]:
model._fc = nn.Identity()
model._swish = nn.Identity()
# model = nn.Sequential(*(list(model.children())[:-3])) # strips off last linear layer
model = model.cuda()

In [7]:
cluster_transform =transforms.Compose([
                      transforms.Resize(256),
                      transforms.CenterCrop(224),
                      transforms.ToTensor(),
                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
train_transform1 =transforms.Compose([
                      transforms.Resize(256),
                      transforms.CenterCrop(224),
                      transforms.RandomHorizontalFlip(),
                      transforms.RandomVerticalFlip(),
                      transforms.ToTensor(),
                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
train_transform2 =transforms.Compose([
                      transforms.Resize(256),
                      transforms.CenterCrop(224),
                      transforms.RandomHorizontalFlip(),
                      transforms.RandomVerticalFlip(),
                      transforms.ToTensor(),
                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [8]:
criterion = nn.CrossEntropyLoss().cuda()
criterion2 = AUGLoss().cuda()

In [9]:
# rural
clusterset = GPSDataset('/home/haoying/res_zl12_effnet_b0_9.7km/nightlights_labeled1.csv', '/home/haoying/data_zl12/', cluster_transform)
trainset = GPSDataset('/home/haoying/res_zl12_effnet_b0_9.7km/nightlights_labeled1.csv', '/home/haoying/data_zl12/', train_transform1, train_transform2)

In [10]:
ddf1 = pd.read_csv('/home/haoying/res_zl12_effnet_v4/nightlights_labeled1.csv')
ddf2 = pd.read_csv('/home/haoying/res_zl12_effnet_v4/nightlights_labeled2.csv')
ddf0 = pd.read_csv('/home/haoying/res_zl12_effnet_v4/nightlights_labeled0.csv')

print(len(ddf1),len(ddf2),len(ddf0))

19397 9125 9214


In [13]:
clusterloader = torch.utils.data.DataLoader(clusterset, batch_size=30, shuffle=False, num_workers=0)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=30, shuffle=True, num_workers=0, drop_last = True)
deepcluster = Kmeans(8)

features = compute_features(clusterloader, model, len(clusterset),30) 
clustering_loss, p_label = deepcluster.cluster(features,pca=10)
p_label = p_label.tolist()
p_label = torch.tensor(p_label).cuda()
model.train()

fc = nn.Linear(1280, 8)
fc.weight.data.normal_(0, 0.01)
fc.bias.data.zero_()
fc.cuda()

optimizer = torch.optim.SGD(model.parameters(),lr=0.001)
optimizer1 = torch.optim.SGD(fc.parameters(),lr=0.001)

k-means loss evolution: [12055.38769531  7210.95703125  6853.24609375  6726.71240234
  6666.53662109  6633.046875    6610.97021484  6590.04882812
  6569.46142578  6554.92041016  6543.77197266  6535.80712891
  6531.55126953  6529.04101562  6527.16748047  6525.515625
  6524.50341797  6523.54736328  6522.63427734  6522.15332031]
tensor([4, 3, 2,  ..., 2, 4, 2], device='cuda:0')
k-means time: 6 s


In [14]:
from sklearn.decomposition import PCA

X_ = features
pca = PCA(n_components = 0.80) 
pca.fit(X_)
reduced_X = pca.transform(X_)
reduced_X.shape

(21672, 10)

In [None]:
for epoch in range(0, 100):
    print("Epoch : %d"% (epoch))
    
    for batch_idx, (inputs1, inputs2, indexes) in enumerate(trainloader):
        inputs1, inputs2, indexes = inputs1.cuda(), inputs2.cuda(), indexes.cuda()           
        batch_size = inputs1.shape[0]
        labels = p_label[indexes].cuda()
        inputs = torch.cat([inputs1, inputs2])
        outputs = model(inputs)
        outputs = outputs.reshape(-1,1280)
        outputs1 = outputs[:batch_size]
        outputs2 = outputs[batch_size:]
        outputs3 = fc(outputs1)
        ce_loss = criterion(outputs3, labels) 
        aug_loss = criterion2(outputs1, outputs2) / 60
#         aug_loss = criterion2(outputs1, outputs2) / 10
        loss = ce_loss + aug_loss
        optimizer.zero_grad()
        optimizer1.zero_grad()
        ce_loss.backward()
#         aug_loss.backward()
        optimizer.step()
        optimizer1.step()

        if batch_idx % 20 == 0:
            print("[BATCH_IDX : ", batch_idx, "LOSS : ",loss.item(), "CE_LOSS : ",ce_loss.item(),"AUG_LOSS : ",aug_loss.item(),"]" )

Epoch : 0
[BATCH_IDX :  0 LOSS :  36.797725677490234 CE_LOSS :  2.5759634971618652 AUG_LOSS :  34.221763610839844 ]
[BATCH_IDX :  20 LOSS :  35.96944808959961 CE_LOSS :  1.6660598516464233 AUG_LOSS :  34.30338668823242 ]
[BATCH_IDX :  40 LOSS :  34.961509704589844 CE_LOSS :  1.3149278163909912 AUG_LOSS :  33.646583557128906 ]
[BATCH_IDX :  60 LOSS :  36.10052490234375 CE_LOSS :  1.6823190450668335 AUG_LOSS :  34.41820526123047 ]
[BATCH_IDX :  80 LOSS :  35.3938102722168 CE_LOSS :  1.6207516193389893 AUG_LOSS :  33.7730598449707 ]
[BATCH_IDX :  100 LOSS :  36.46091842651367 CE_LOSS :  1.9249355792999268 AUG_LOSS :  34.53598403930664 ]
[BATCH_IDX :  120 LOSS :  35.322486877441406 CE_LOSS :  1.713403344154358 AUG_LOSS :  33.60908508300781 ]
[BATCH_IDX :  140 LOSS :  35.98539733886719 CE_LOSS :  1.5343314409255981 AUG_LOSS :  34.45106506347656 ]
[BATCH_IDX :  160 LOSS :  35.101646423339844 CE_LOSS :  1.9689923524856567 AUG_LOSS :  33.132652282714844 ]
[BATCH_IDX :  180 LOSS :  35.906974792

[BATCH_IDX :  80 LOSS :  35.91375732421875 CE_LOSS :  1.5454859733581543 AUG_LOSS :  34.36827087402344 ]
[BATCH_IDX :  100 LOSS :  35.39961624145508 CE_LOSS :  1.308115839958191 AUG_LOSS :  34.09149932861328 ]
[BATCH_IDX :  120 LOSS :  35.32647705078125 CE_LOSS :  1.5051932334899902 AUG_LOSS :  33.821285247802734 ]
[BATCH_IDX :  140 LOSS :  35.375728607177734 CE_LOSS :  1.1032346487045288 AUG_LOSS :  34.27249526977539 ]
[BATCH_IDX :  160 LOSS :  35.622589111328125 CE_LOSS :  1.5080959796905518 AUG_LOSS :  34.11449432373047 ]
[BATCH_IDX :  180 LOSS :  35.238189697265625 CE_LOSS :  1.0033652782440186 AUG_LOSS :  34.234825134277344 ]
[BATCH_IDX :  200 LOSS :  36.092918395996094 CE_LOSS :  1.658701777458191 AUG_LOSS :  34.4342155456543 ]
[BATCH_IDX :  220 LOSS :  35.33893585205078 CE_LOSS :  1.1230380535125732 AUG_LOSS :  34.21589660644531 ]
[BATCH_IDX :  240 LOSS :  35.56914520263672 CE_LOSS :  1.3499760627746582 AUG_LOSS :  34.21916961669922 ]
[BATCH_IDX :  260 LOSS :  36.101783752441406

[BATCH_IDX :  160 LOSS :  35.005889892578125 CE_LOSS :  1.111380934715271 AUG_LOSS :  33.894508361816406 ]
[BATCH_IDX :  180 LOSS :  34.993499755859375 CE_LOSS :  1.1406716108322144 AUG_LOSS :  33.85282897949219 ]
[BATCH_IDX :  200 LOSS :  35.44406509399414 CE_LOSS :  1.2645494937896729 AUG_LOSS :  34.17951583862305 ]
[BATCH_IDX :  220 LOSS :  35.9193229675293 CE_LOSS :  1.5053240060806274 AUG_LOSS :  34.413997650146484 ]
[BATCH_IDX :  240 LOSS :  35.50479507446289 CE_LOSS :  1.4339512586593628 AUG_LOSS :  34.07084274291992 ]
[BATCH_IDX :  260 LOSS :  35.740478515625 CE_LOSS :  1.2723681926727295 AUG_LOSS :  34.468109130859375 ]
[BATCH_IDX :  280 LOSS :  35.556121826171875 CE_LOSS :  1.3998572826385498 AUG_LOSS :  34.15626525878906 ]
[BATCH_IDX :  300 LOSS :  35.642757415771484 CE_LOSS :  0.9844444990158081 AUG_LOSS :  34.6583137512207 ]
[BATCH_IDX :  320 LOSS :  34.93177795410156 CE_LOSS :  1.281515121459961 AUG_LOSS :  33.65026092529297 ]
[BATCH_IDX :  340 LOSS :  34.962608337402344 

[BATCH_IDX :  240 LOSS :  35.90045166015625 CE_LOSS :  0.9947369694709778 AUG_LOSS :  34.90571594238281 ]
[BATCH_IDX :  260 LOSS :  35.24053192138672 CE_LOSS :  1.1928753852844238 AUG_LOSS :  34.04765701293945 ]
[BATCH_IDX :  280 LOSS :  35.26482009887695 CE_LOSS :  1.500867486000061 AUG_LOSS :  33.763954162597656 ]
[BATCH_IDX :  300 LOSS :  35.560546875 CE_LOSS :  1.6166983842849731 AUG_LOSS :  33.94384765625 ]
[BATCH_IDX :  320 LOSS :  35.836822509765625 CE_LOSS :  1.252665400505066 AUG_LOSS :  34.58415603637695 ]
[BATCH_IDX :  340 LOSS :  35.90696716308594 CE_LOSS :  0.8413085341453552 AUG_LOSS :  35.06565856933594 ]
[BATCH_IDX :  360 LOSS :  35.87959671020508 CE_LOSS :  1.2604871988296509 AUG_LOSS :  34.619110107421875 ]
[BATCH_IDX :  380 LOSS :  35.2229118347168 CE_LOSS :  1.0990946292877197 AUG_LOSS :  34.123817443847656 ]
[BATCH_IDX :  400 LOSS :  35.95759963989258 CE_LOSS :  1.1736432313919067 AUG_LOSS :  34.78395462036133 ]
[BATCH_IDX :  420 LOSS :  35.475730895996094 CE_LOSS 

KeyboardInterrupt: 

In [20]:
os.chdir('/home/haoying/res_zl12_effnet_v4')
torch.save(model, 'rural.pt')

In [21]:
# city
clusterset = GPSDataset('/home/haoying/res_zl12_effnet_v4/nightlights_labeled2.csv', '/home/haoying/data_zl12/', cluster_transform)
trainset = GPSDataset('/home/haoying/res_zl12_effnet_v4/nightlights_labeled2.csv', '/home/haoying/data_zl12/', train_transform1, train_transform2)

In [22]:
model = torch.load('/home/haoying/res_zl12_effnet_v4/res_pretrained.pt')
model._fc = nn.Identity()
model._swish = nn.Identity()
# model = nn.Sequential(*(list(model.children())[:-3])) # strips off last linear layer
model = model.cuda()

In [24]:
from sklearn.decomposition import PCA

X_ = features
pca = PCA(n_components = 0.80) 
pca.fit(X_)
reduced_X = pca.transform(X_)
reduced_X.shape

(9125, 13)

In [25]:
clusterloader = torch.utils.data.DataLoader(clusterset, batch_size=10, shuffle=False, num_workers=0)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=10, shuffle=True, num_workers=0, drop_last = True)
deepcluster = Kmeans(5)

features = compute_features(clusterloader, model, len(clusterset), 10) 
clustering_loss, p_label = deepcluster.cluster(features,pca=13)
p_label = p_label.tolist()
p_label = torch.tensor(p_label).cuda()
model.train()

fc = nn.Linear(1280, 5)
fc.weight.data.normal_(0, 0.01)
fc.bias.data.zero_()
fc.cuda()

optimizer = torch.optim.SGD(model.parameters(),lr=0.001)
optimizer1 = torch.optim.SGD(fc.parameters(),lr=0.001)

k-means loss evolution: [5362.55761719 2958.05908203 2809.74853516 2777.84570312 2739.91455078
 2663.68432617 2569.99804688 2534.28881836 2521.49609375 2513.04248047
 2507.09472656 2502.73754883 2497.34277344 2492.24169922 2487.96850586
 2483.46069336 2476.3112793  2467.15991211 2461.41064453 2458.2668457 ]
tensor([3, 3, 3,  ..., 0, 3, 3], device='cuda:0')
k-means time: 2 s


In [None]:
for epoch in range(0, 100):
    print("Epoch : %d"% (epoch))
    
    for batch_idx, (inputs1, inputs2, indexes) in enumerate(trainloader):
        inputs1, inputs2, indexes = inputs1.cuda(), inputs2.cuda(), indexes.cuda()           
        batch_size = inputs1.shape[0]
        labels = p_label[indexes].cuda()
        inputs = torch.cat([inputs1, inputs2])
        outputs = model(inputs)
        outputs=outputs.reshape(-1,1280)
        outputs1 = outputs[:batch_size]
        outputs2 = outputs[batch_size:]
        outputs3 = fc(outputs1)
        ce_loss = criterion(outputs3, labels)
        aug_loss = criterion2(outputs1, outputs2) / 30
        loss = ce_loss + aug_loss
        optimizer.zero_grad()
        optimizer1.zero_grad()
        ce_loss.backward()
        optimizer.step()
        optimizer1.step()

        if batch_idx % 20 == 0:
            print("[BATCH_IDX : ", batch_idx, "LOSS : ",loss.item(), "CE_LOSS : ",ce_loss.item(),"AUG_LOSS : ",aug_loss.item(),"]" )

Epoch : 0
[BATCH_IDX :  0 LOSS :  24.323280334472656 CE_LOSS :  1.7344690561294556 AUG_LOSS :  22.58881187438965 ]
[BATCH_IDX :  20 LOSS :  23.345783233642578 CE_LOSS :  1.2103490829467773 AUG_LOSS :  22.135433197021484 ]
[BATCH_IDX :  40 LOSS :  22.673900604248047 CE_LOSS :  1.2271826267242432 AUG_LOSS :  21.446718215942383 ]
[BATCH_IDX :  60 LOSS :  22.78711700439453 CE_LOSS :  1.3806514739990234 AUG_LOSS :  21.406465530395508 ]
[BATCH_IDX :  80 LOSS :  23.44489097595215 CE_LOSS :  1.2270513772964478 AUG_LOSS :  22.21784019470215 ]
[BATCH_IDX :  100 LOSS :  23.939218521118164 CE_LOSS :  1.5294115543365479 AUG_LOSS :  22.409807205200195 ]
[BATCH_IDX :  120 LOSS :  23.112979888916016 CE_LOSS :  0.8744834661483765 AUG_LOSS :  22.238496780395508 ]
[BATCH_IDX :  140 LOSS :  22.87371253967285 CE_LOSS :  0.43530821800231934 AUG_LOSS :  22.438404083251953 ]
[BATCH_IDX :  160 LOSS :  22.936431884765625 CE_LOSS :  1.0777267217636108 AUG_LOSS :  21.858705520629883 ]
[BATCH_IDX :  180 LOSS :  24

[BATCH_IDX :  620 LOSS :  22.875547409057617 CE_LOSS :  0.6843886375427246 AUG_LOSS :  22.191158294677734 ]
[BATCH_IDX :  640 LOSS :  23.554750442504883 CE_LOSS :  1.3607271909713745 AUG_LOSS :  22.19402313232422 ]
[BATCH_IDX :  660 LOSS :  23.591798782348633 CE_LOSS :  0.6625816226005554 AUG_LOSS :  22.929216384887695 ]
[BATCH_IDX :  680 LOSS :  23.172367095947266 CE_LOSS :  0.43870919942855835 AUG_LOSS :  22.733657836914062 ]
[BATCH_IDX :  700 LOSS :  24.36164093017578 CE_LOSS :  1.8599668741226196 AUG_LOSS :  22.50167465209961 ]
[BATCH_IDX :  720 LOSS :  23.350482940673828 CE_LOSS :  0.7557963132858276 AUG_LOSS :  22.59468650817871 ]
[BATCH_IDX :  740 LOSS :  22.44379997253418 CE_LOSS :  0.5365921854972839 AUG_LOSS :  21.907207489013672 ]
[BATCH_IDX :  760 LOSS :  23.590354919433594 CE_LOSS :  0.6751765012741089 AUG_LOSS :  22.915178298950195 ]
[BATCH_IDX :  780 LOSS :  22.909465789794922 CE_LOSS :  1.5998197793960571 AUG_LOSS :  21.309646606445312 ]
[BATCH_IDX :  800 LOSS :  23.761

[BATCH_IDX :  320 LOSS :  22.85939598083496 CE_LOSS :  0.8493660688400269 AUG_LOSS :  22.01003074645996 ]
[BATCH_IDX :  340 LOSS :  24.069774627685547 CE_LOSS :  1.4231420755386353 AUG_LOSS :  22.64663314819336 ]
[BATCH_IDX :  360 LOSS :  23.13298225402832 CE_LOSS :  1.107054352760315 AUG_LOSS :  22.025928497314453 ]
[BATCH_IDX :  380 LOSS :  23.09697151184082 CE_LOSS :  0.7886620759963989 AUG_LOSS :  22.30830955505371 ]
[BATCH_IDX :  400 LOSS :  23.548948287963867 CE_LOSS :  0.9827054738998413 AUG_LOSS :  22.566242218017578 ]
[BATCH_IDX :  420 LOSS :  22.246957778930664 CE_LOSS :  0.6523185968399048 AUG_LOSS :  21.59463882446289 ]
[BATCH_IDX :  440 LOSS :  22.965030670166016 CE_LOSS :  0.5896183252334595 AUG_LOSS :  22.375411987304688 ]
[BATCH_IDX :  460 LOSS :  24.501638412475586 CE_LOSS :  1.5679492950439453 AUG_LOSS :  22.93368911743164 ]
[BATCH_IDX :  480 LOSS :  23.815216064453125 CE_LOSS :  1.2826488018035889 AUG_LOSS :  22.532567977905273 ]
[BATCH_IDX :  500 LOSS :  24.07075119

[BATCH_IDX :  20 LOSS :  22.95826530456543 CE_LOSS :  0.9326141476631165 AUG_LOSS :  22.025651931762695 ]
[BATCH_IDX :  40 LOSS :  23.431737899780273 CE_LOSS :  1.1347019672393799 AUG_LOSS :  22.297035217285156 ]
[BATCH_IDX :  60 LOSS :  24.870365142822266 CE_LOSS :  1.9409332275390625 AUG_LOSS :  22.929431915283203 ]
[BATCH_IDX :  80 LOSS :  23.546855926513672 CE_LOSS :  0.7973687648773193 AUG_LOSS :  22.749486923217773 ]
[BATCH_IDX :  100 LOSS :  23.7030086517334 CE_LOSS :  0.9021127820014954 AUG_LOSS :  22.80089569091797 ]
[BATCH_IDX :  120 LOSS :  22.191682815551758 CE_LOSS :  0.4350524842739105 AUG_LOSS :  21.756629943847656 ]
[BATCH_IDX :  140 LOSS :  23.067394256591797 CE_LOSS :  0.8109312057495117 AUG_LOSS :  22.25646209716797 ]
[BATCH_IDX :  160 LOSS :  24.249454498291016 CE_LOSS :  1.2157105207443237 AUG_LOSS :  23.03374481201172 ]
[BATCH_IDX :  180 LOSS :  24.311044692993164 CE_LOSS :  1.4259573221206665 AUG_LOSS :  22.885087966918945 ]
[BATCH_IDX :  200 LOSS :  23.032085418

[BATCH_IDX :  640 LOSS :  23.470108032226562 CE_LOSS :  0.9905996322631836 AUG_LOSS :  22.479507446289062 ]
[BATCH_IDX :  660 LOSS :  23.928781509399414 CE_LOSS :  1.6184552907943726 AUG_LOSS :  22.310325622558594 ]
[BATCH_IDX :  680 LOSS :  23.28629493713379 CE_LOSS :  0.49718061089515686 AUG_LOSS :  22.789113998413086 ]
[BATCH_IDX :  700 LOSS :  23.765644073486328 CE_LOSS :  1.175018072128296 AUG_LOSS :  22.590625762939453 ]
[BATCH_IDX :  720 LOSS :  22.67774200439453 CE_LOSS :  0.7297993898391724 AUG_LOSS :  21.94794273376465 ]
[BATCH_IDX :  740 LOSS :  22.309890747070312 CE_LOSS :  0.473261296749115 AUG_LOSS :  21.83662986755371 ]
[BATCH_IDX :  760 LOSS :  23.057228088378906 CE_LOSS :  0.6073676347732544 AUG_LOSS :  22.449859619140625 ]
[BATCH_IDX :  780 LOSS :  23.105045318603516 CE_LOSS :  0.8107409477233887 AUG_LOSS :  22.29430389404297 ]
[BATCH_IDX :  800 LOSS :  23.761539459228516 CE_LOSS :  0.8338583111763 AUG_LOSS :  22.92768096923828 ]
[BATCH_IDX :  820 LOSS :  23.526494979

[BATCH_IDX :  340 LOSS :  22.37851333618164 CE_LOSS :  0.9992860555648804 AUG_LOSS :  21.379226684570312 ]
[BATCH_IDX :  360 LOSS :  23.530197143554688 CE_LOSS :  0.6911457777023315 AUG_LOSS :  22.839052200317383 ]
[BATCH_IDX :  380 LOSS :  22.051589965820312 CE_LOSS :  0.389198362827301 AUG_LOSS :  21.662391662597656 ]
[BATCH_IDX :  400 LOSS :  23.320589065551758 CE_LOSS :  1.0440330505371094 AUG_LOSS :  22.27655601501465 ]
[BATCH_IDX :  420 LOSS :  23.829410552978516 CE_LOSS :  1.1521724462509155 AUG_LOSS :  22.67723846435547 ]
[BATCH_IDX :  440 LOSS :  22.401018142700195 CE_LOSS :  0.38730281591415405 AUG_LOSS :  22.013715744018555 ]
[BATCH_IDX :  460 LOSS :  23.374502182006836 CE_LOSS :  1.1477634906768799 AUG_LOSS :  22.22673797607422 ]
[BATCH_IDX :  480 LOSS :  24.20378875732422 CE_LOSS :  1.303061604499817 AUG_LOSS :  22.900726318359375 ]
[BATCH_IDX :  500 LOSS :  24.503175735473633 CE_LOSS :  2.8085272312164307 AUG_LOSS :  21.69464874267578 ]
[BATCH_IDX :  520 LOSS :  22.280696

[BATCH_IDX :  40 LOSS :  24.049020767211914 CE_LOSS :  1.2200195789337158 AUG_LOSS :  22.82900047302246 ]
[BATCH_IDX :  60 LOSS :  22.937108993530273 CE_LOSS :  0.6210450530052185 AUG_LOSS :  22.316064834594727 ]
[BATCH_IDX :  80 LOSS :  22.105670928955078 CE_LOSS :  0.5953255891799927 AUG_LOSS :  21.510345458984375 ]
[BATCH_IDX :  100 LOSS :  23.673871994018555 CE_LOSS :  2.0955421924591064 AUG_LOSS :  21.57832908630371 ]
[BATCH_IDX :  120 LOSS :  24.054494857788086 CE_LOSS :  1.3420275449752808 AUG_LOSS :  22.712467193603516 ]
[BATCH_IDX :  140 LOSS :  23.46622085571289 CE_LOSS :  1.5180621147155762 AUG_LOSS :  21.948158264160156 ]
[BATCH_IDX :  160 LOSS :  23.217004776000977 CE_LOSS :  0.6680790185928345 AUG_LOSS :  22.548925399780273 ]
[BATCH_IDX :  180 LOSS :  22.93289566040039 CE_LOSS :  0.7963413000106812 AUG_LOSS :  22.136554718017578 ]
[BATCH_IDX :  200 LOSS :  22.97323989868164 CE_LOSS :  0.7411280870437622 AUG_LOSS :  22.23211097717285 ]
[BATCH_IDX :  220 LOSS :  23.31301879

In [44]:
os.chdir('/home/haoying/res_zl12_effnet_v4')
torch.save(model, 'city.pt')

In [9]:
# nature
clusterset = GPSDataset('/home/haoying/res_zl12_effnet_v3/nightlights_labeled0.csv', '/home/haoying/data_zl12/', cluster_transform)
trainset = GPSDataset('/home/haoying/res_zl12_effnet_v3/nightlights_labeled0.csv', '/home/haoying/data_zl12/', train_transform1, train_transform2)

In [46]:
model = torch.load('/home/haoying/res_zl12_effnet_v4/res_pretrained.pt')
model._fc = nn.Identity()
model._swish = nn.Identity()
# model = nn.Sequential(*(list(model.children())[:-3])) # strips off last linear layer
model = model.cuda()

In [11]:
from sklearn.decomposition import PCA

X_ = features
pca = PCA(n_components = 0.80) 
pca.fit(X_)
reduced_X = pca.transform(X_)
reduced_X.shape

(10086, 31)

In [10]:
clusterloader = torch.utils.data.DataLoader(clusterset, batch_size=20, shuffle=False, num_workers=0)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=20, shuffle=True, num_workers=0, drop_last = True)
deepcluster = Kmeans(8)

features = compute_features(clusterloader, model, len(clusterset), 20) 
clustering_loss, p_label = deepcluster.cluster(features,pca=31)
p_label = p_label.tolist()
p_label = torch.tensor(p_label).cuda()
model.train()

fc = nn.Linear(1280,8)
fc.weight.data.normal_(0, 0.01)
fc.bias.data.zero_()
fc.cuda()

optimizer = torch.optim.SGD(model.parameters(),lr=0.001)
optimizer1 = torch.optim.SGD(fc.parameters(),lr=0.001)

k-means loss evolution: [5909.20996094 3241.57470703 3117.0402832  3055.37329102 3022.85375977
 2995.59643555 2966.43139648 2942.66821289 2926.94213867 2916.28930664
 2910.00683594 2906.57275391 2904.62109375 2903.6237793  2903.13427734
 2902.8137207  2902.61132812 2902.4128418  2902.14379883 2901.40576172]
tensor([1, 0, 7,  ..., 4, 4, 5], device='cuda:0')
k-means time: 1 s


In [2]:
for epoch in range(0, 100):
    print("Epoch : %d"% (epoch))
    
    for batch_idx, (inputs1, inputs2, indexes) in enumerate(trainloader):
        inputs1, inputs2, indexes = inputs1.cuda(), inputs2.cuda(), indexes.cuda()           
        batch_size = inputs1.shape[0]
        labels = p_label[indexes].cuda()
        inputs = torch.cat([inputs1, inputs2])
        outputs = model(inputs)
        outputs = outputs.reshape(-1,1280)
        outputs1 = outputs[:batch_size]
        outputs2 = outputs[batch_size:]
        outputs3 = fc(outputs1)
        ce_loss = criterion(outputs3, labels)
        aug_loss = criterion2(outputs1, outputs2) / 50
        loss = ce_loss + aug_loss
        optimizer.zero_grad()
        optimizer1.zero_grad()
        ce_loss.backward()
        optimizer.step()
        optimizer1.step()

        if batch_idx % 20 == 0:
            print("[BATCH_IDX : ", batch_idx, "LOSS : ",loss.item(), "CE_LOSS : ",ce_loss.item(),"AUG_LOSS : ",aug_loss.item(),"]" )

Epoch : 0


NameError: name 'trainloader' is not defined

In [13]:
os.chdir('/home/haoying/res_zl12_effnet_v4')
torch.save(model, 'nature.pt')