In [1]:
import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable
import torchvision
import torchvision.transforms as T
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data.sampler import SubsetRandomSampler, RandomSampler
import torchvision.models as models
import torch.backends.cudnn as cudnn
from torch.optim.lr_scheduler import MultiStepLR, ReduceLROnPlateau,StepLR
#torch.multiprocessing.set_start_method("spawn")
import vgg_fcn
import vgg
from sklearn.model_selection import KFold,StratifiedKFold
import copy

import os
import numpy as np
import pandas as pd
from utils import progress_bar
from skimage import transform as tf

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

In [2]:
BASE_dir = 'data/processed/'

train = pd.read_json(BASE_dir + 'train.json')
#test = pd.read_json(BASE_dir + 'test.json')

In [3]:
def iso(arr):
    p = np.reshape(np.array(arr), [75,75]) >(np.mean(np.array(arr))+2*np.std(np.array(arr)))
    return p * np.reshape(np.array(arr), [75,75])

# Size in number of pixels of every isolated object.
def size(arr):     
    return np.sum(arr<-5)
# Feature engineering iso1 and iso2.
train['iso1'] = train.iloc[:, 0].apply(iso)
train['iso2'] = train.iloc[:, 1].apply(iso)

# Feature engineering s1 s2 and size.
train['s1'] = train.iloc[:,5].apply(size)
train['s2'] = train.iloc[:,6].apply(size)
train['size'] = train.s1+train.s2

In [14]:
train

Unnamed: 0,band_1,band_2,id,inc_angle,is_iceberg,iso1,iso2,s1,s2,size
0,"[-27.878360999999998, -27.15416, -28.668615, -...","[-27.154118, -29.537888, -31.0306, -32.190483,...",dfd5f913,43.9239,0,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...",64,57,121
1,"[-12.242375, -14.920304999999999, -14.920363, ...","[-31.506321, -27.984554, -26.645678, -23.76760...",e25388fd,38.1562,0,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...",17,92,109
2,"[-24.603676, -24.603714, -24.871029, -23.15277...","[-24.870956, -24.092632, -20.653963, -19.41104...",58b2aaa0,45.2859,1,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -19.411043, -0.0, -0.0, -2...",91,60,151
3,"[-22.454607, -23.082819, -23.998013, -23.99805...","[-27.889421, -27.519794, -27.165262, -29.10350...",4cfc3a18,43.8306,0,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...",52,48,100
4,"[-26.006956, -23.164886, -23.164886, -26.89116...","[-27.206915, -30.259186, -30.259186, -23.16495...",271f93f4,35.6256,0,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...",68,44,112
5,"[-20.769371, -20.769434, -25.906025, -25.90602...","[-29.288746, -29.712593, -28.884804, -28.88480...",b51d18b5,36.9034,1,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...",120,153,273
6,"[-26.673811, -23.666162, -27.622442, -28.31768...","[-24.557735, -26.97868, -27.622442, -29.073456...",31da1a04,34.4751,1,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -21.9427...",133,74,207
7,"[-24.989119, -27.755224, -25.817074, -24.98927...","[-27.755173, -26.732174, -28.124943, -31.83772...",56929c16,41.1769,0,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...",111,53,164
8,"[-17.146641, -17.146572, -17.994583, -19.44553...","[-25.733608, -24.472507, -24.710424, -22.77215...",525ab75c,35.7829,0,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...",8,116,124
9,"[-24.020853, -23.551275, -27.18819, -29.126434...","[-28.702518, -33.563324, -29.571918, -29.12643...",192f56eb,43.3007,0,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...","[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0...",60,65,125


In [3]:
#prepare data
use_cuda= True if torch.cuda.is_available() else False
#use_cuda =False
#dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor 
dtype = torch.FloatTensor 
data=  pd.read_json(BASE_dir + 'train.json')

class iceberg_dataset(Dataset):
    def __init__(self, data, label, transform=None, test=False): #data: 1604 * 3 *75* 75
        self.data =data
        self.label = torch.from_numpy(label).type(torch.LongTensor)
        self.transform= transform
        self.test= test
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        img, label=  self.data[idx], self.label[idx]
        if self.transform is not None:
            #Random Horizontal Flip and Vertical Flip 
            #https://discuss.pytorch.org/t/torch-from-numpy-not-support-negative-strides/3663
            if self.test is False:
                if np.random.uniform()>0.5:
                    img = np.flip(img,axis=1).copy()
                if np.random.uniform()>0.5:
                    img = np.flip(img,axis=2).copy()
                rotate = np.random.randint(4, size=1)
                if rotate:
                    img = np.rot90(img,k=rotate,axes=(1,2)).copy()
                
                scale1 = np.exp(np.random.uniform(np.log(1/1.1), np.log(1.1)))
                tran = np.random.uniform(-5, 5)
                aug = tf.AffineTransform(translation=tran, scale= (scale1, scale1))
                img = tf.warp(img, inverse_map=aug)
                pass
#             temp = []
#             for i in img:
#                 temp.append(tf.rescale(i,224/75,mode='constant'))
#             img = np.stack(temp)
            img = torch.from_numpy(img).type(dtype)
#             img = self.transform(img)

        return img, label

class iceberg_angle_dataset(Dataset):
    def __init__(self, data,angle,label,size=None, transform=None, test=False): #data: 1604 * 3 *75* 75
        self.data =data
#         self.angle=torch.cat( (torch.from_numpy(angle).type(torch.FloatTensor).unsqueeze(1),torch.from_numpy(size).type(torch.FloatTensor).unsqueeze(1)),1)
        self.angle=torch.from_numpy(angle).type(torch.FloatTensor).unsqueeze(1)
        self.label = torch.from_numpy(label).type(torch.LongTensor)
        self.transform= transform
        self.test= test
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        img, label, angle=  self.data[idx], self.label[idx], self.angle[idx]
        if self.transform is not None:
            #Random Horizontal Flip and Vertical Flip 
            #https://discuss.pytorch.org/t/torch-from-numpy-not-support-negative-strides/3663
            
            #rotate, scale, shear, translation
#             if self.test is False:
#                 angle = np.random.uniform(0,360)
#                 img = tf.rotate(img,angle=angle,resize=False)
#                 scale1 = np.exp(np.random.uniform(np.log(1/1.2), np.log(1.2)))
#                 scale2 = np.exp(np.random.uniform(np.log(1/1.2), np.log(1.1)))
#                 #shear = np.random.uniform(-np.pi/18, np.pi/18)
#                 #tran = np.random.uniform(-5, 5)
#                 #aug = tf.AffineTransform(shear = shear, translation=tran, scale= (scale1, scale2))
#                 aug = tf.AffineTransform(scale= (scale1, scale2))
#                 img = tf.warp(img, inverse_map=aug)
            
#                 if np.random.uniform()>0.5:
#                     img = np.flip(img,axis=1).copy()
#                 if np.random.uniform()>0.5:
#                     img = np.flip(img,axis=2).copy()
            
            if self.test is False:
                if np.random.uniform()>0.5:
                    img = np.flip(img,axis=1).copy()
                if np.random.uniform()>0.5:
                    img = np.flip(img,axis=2).copy()
#                 rotate = np.random.randint(4, size=1)
#                 if rotate:
#                     img = np.rot90(img,k=rotate,axes=(1,2)).copy()
            pass
        img = torch.from_numpy(img).type(dtype)
#         img = self.transform(img)

        return img, angle,label    
    
    
def stack(row):
    return np.stack(row[['c1','c2','c3']]).reshape(3,75,75)

def raw_to_numpy(data):
    img = []
    data['c1'] = data['band_1'].apply(np.array)
    data['c2'] = data['band_2'].apply(np.array)
    data['c3'] = (data['c1'] + data['c2'])/2
#     data['c3'] = (data['c1'] + data['c2'])/2
    for _, row in data.iterrows():
        img.append(stack(row))
    return np.stack(img)

def transform_compute(img):
    train_mean = img.mean(axis=(0,2,3))
    train_std = img.std(axis=(0,2,3))
    return train_mean, train_std

def data_aug(X, y):    
    X_rot_30 = []
    X_rot_60 = [] 
    X_h = np.flip(X, 3)
    X_v = np.flip(X, 2)
    for i in X:
        X_rot_30.append(tf.rotate(i,angle=90,resize=False))
        X_rot_60.append(tf.rotate(i,angle=270,resize=False))
        
    X_rot_30 = np.stack(X_rot_30)
    X_rot_60 = np.stack(X_rot_60)
    ch_y = np.concatenate((y,y,y,y,y))
    ch_X = np.concatenate((X, X_h, X_v, X_rot_30, X_rot_60))
    return ch_X, ch_y

train_X = raw_to_numpy(data)#.transpose(0,2,3,1)
train_X.shape     #1604 * 3 *75* 75   N*c*H*W
train_y = data['is_iceberg'].values # if iceberg then 1

In [26]:
train_X_del = train_X
train_y_del = train_y
result = []
for num,i in enumerate(train_X_del):
    temp = []
    for j in i:
        temp.append(tf.rescale(j,224/75,mode='constant'))
    img = np.stack(temp)
    result.append(img)
    if num%50==0:
        print('We are %d'%num)
train_X_del = np.stack(result)

We are 0
We are 50
We are 100
We are 150
We are 200
We are 250
We are 300
We are 350
We are 400
We are 450
We are 500
We are 550
We are 600
We are 650
We are 700
We are 750
We are 800
We are 850
We are 900
We are 950
We are 1000
We are 1050
We are 1100
We are 1150
We are 1200
We are 1250
We are 1300
We are 1350
We are 1400
We are 1450
We are 1500
We are 1550
We are 1600


In [35]:
# train_index=list(range(1300))
# val_index= list(range(1300,1604))
# train_index=list(range(304,1604)) 
# val_index= list(range(304))
# # train_X[train_index].shape

# # data.inc_angle = data.inc_angle.map(lambda x: 0.0 if x == 'na' else x)
# # train_index = np.where(data.inc_angle > 0)[0]
# # val_index = np.where(data.inc_angle <= 0)[0]

# # seed= np.random.RandomState(123)
# # spliter = KFold(n_splits=5,shuffle =True,random_state = seed)
# # train_index, val_index = next(spliter.split(train_X))
# train_mean, train_std = transform_compute(train_X[train_index])
# train_transform = T.Compose([
#     T.Normalize(train_mean, train_std)
# ])

# train_dataset = iceberg_dataset(data= train_X[train_index], label=train_y[train_index], transform=train_transform)
# val_dataset = iceberg_dataset(data= train_X[val_index], label=train_y[val_index], transform=train_transform, test=True)

# train_loader = DataLoader(train_dataset, batch_size = 32, num_workers=3, 
#                           shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size = 64, num_workers=3)

## add augmentation 
# seed= np.random.RandomState(123)
# spliter = KFold(n_splits=5,shuffle =True,random_state = seed)
# train_index, val_index = next(spliter.split(train_X))

# train_X_af,train_y_af = data_aug(train_X[train_index], train_y[train_index])
# train_mean, train_std = transform_compute(train_X_af)
# train_transform = T.Compose([
#     T.Normalize(train_mean, train_std)
# ])

# train_dataset = iceberg_dataset(data= train_X_af, label=train_y_af, transform=train_transform)
# val_dataset = iceberg_dataset(data= train_X[val_index], label=train_y[val_index], transform=train_transform, test=True)

# train_loader = DataLoader(train_dataset, batch_size = 32, num_workers=3, 
#                           shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size = 64, num_workers=3)


# train_X_del = train_X[data.inc_angle!='na',:,:,:]
# train_y_del = train_y[data.inc_angle!='na']
train_X_del = train_X
train_y_del = train_y

seed= np.random.RandomState(123)
spliter = KFold(n_splits=5,shuffle =True,random_state = seed)
train_index, val_index = next(spliter.split(train_X_del))
# # train_index=list(range(284,1471)) 
# # val_index= list(range(284))

train_mean, train_std = transform_compute(train_X_del[train_index])
train_transform = T.Compose([
    T.Normalize(train_mean, train_std)
])
# af_train_X, af_train_y = data_aug(train_X_del[train_index], train_y_del[train_index])
#af_train_X, af_train_y = data_aug2(train_X_del[train_index], train_y_del[train_index])
af_train_X, af_train_y = train_X_del[train_index], train_y_del[train_index]

train_dataset = iceberg_dataset(data= af_train_X, label=af_train_y, transform=train_transform)
val_dataset = iceberg_dataset(data= train_X_del[val_index], label=train_y_del[val_index], transform=train_transform, test=True)

train_loader = DataLoader(train_dataset, batch_size = 16, num_workers=3, 
                          shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = 64, num_workers=3)

In [28]:
train_X_del = train_X
train_y_del = train_y
train_mean, train_std = transform_compute(train_X_del[train_index])
train_transform = T.Compose([
    T.Normalize(train_mean, train_std)
])

In [4]:
## angle and size

data['inc_angle'][data['inc_angle']=='na']=0
train_X = train_X
train_angle_del = data['inc_angle'].values
train_angle = train_angle_del.astype(np.float)
#train_size = train['size'].values
train_y = train_y

train_X_del = train_X
train_y_del = train_y

seed= np.random.RandomState(123)
spliter = KFold(n_splits=5,shuffle =True,random_state = seed)
train_index, val_index = next(spliter.split(train_X_del))
# # train_index=list(range(284,1471)) 
# # val_index= list(range(284))

train_mean, train_std = transform_compute(train_X_del[train_index])
train_transform = T.Compose([
    T.Normalize(train_mean, train_std)
])
#af_train_X,af_train_angle, af_train_y = data_aug(train_X_del[train_index], train_angle_del[train_index],train_y_del[train_index])
#af_train_X, af_train_y = data_aug2(train_X_del[train_index], train_y_del[train_index])


train_dataset = iceberg_angle_dataset(data= train_X[train_index], angle=train_angle[train_index],
                                    label=train_y[train_index],
                                    transform=train_transform, test=True)

val_dataset = iceberg_angle_dataset(data= train_X[val_index], angle=train_angle[val_index],
                                    label=train_y[val_index],
                                    transform=train_transform, test=True)

# train_dataset = iceberg_angle_dataset(data= train_X[train_index], angle=train_angle[train_index],size=train_size[train_index],
#                                     label=train_y[train_index],
#                                     transform=train_transform)

# val_dataset = iceberg_angle_dataset(data= train_X[val_index], angle=train_angle[val_index],size= train_size[val_index],
#                                     label=train_y[val_index],
#                                     transform=train_transform, test=True)



train_loader = DataLoader(train_dataset, batch_size = 16, num_workers=3, 
                          shuffle=True)
val_loader = DataLoader(val_dataset, batch_size = 64, num_workers=3)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [35]:
for i in train_loader:
    print(i)
    break

[
(0 ,0 ,.,.) = 
 -1.0975e+00 -9.8599e-01 -1.0975e+00  ...  -1.8955e+00 -8.8212e-01 -6.9086e-01
 -1.0408e+00 -9.3292e-01 -1.0975e+00  ...  -2.9090e-01 -8.8212e-01 -4.3962e-01
 -1.0408e+00 -1.6371e+00 -1.6371e+00  ...  -2.8118e-02 -9.0055e-02 -4.7897e-01
                 ...                   ⋱                   ...                
 -1.2174e+00 -8.3202e-01 -6.9066e-01  ...  -1.5604e+00 -1.6382e+00 -1.4150e+00
 -9.8645e-01 -1.0413e+00 -9.3338e-01  ...  -8.8258e-01 -1.6382e+00 -1.1573e+00
 -9.8645e-01 -1.2174e+00 -5.1905e-01  ...  -9.3402e-01 -1.1573e+00 -9.8712e-01

(0 ,1 ,.,.) = 
 -9.6188e-01 -1.2315e+00 -7.1806e-01  ...  -8.3805e-01 -1.0941e+00 -1.5338e+00
 -7.1804e-01 -8.3707e-01 -7.1806e-01  ...  -4.9642e-01 -1.5338e+00 -1.3787e+00
 -7.1804e-01 -4.9544e-01 -4.9545e-01  ...  -2.9163e-01 -2.9164e-01 -6.0535e-01
                 ...                   ⋱                   ...                
 -1.9474e-01 -1.3784e+00 -2.2696e+00  ...  -2.7370e+00 -1.3794e+00 -2.9234e-01
 -1.6986e+00 -1.378

In [30]:
torch.cuda.is_available()
torch.from_numpy(train_X).type(torch.FloatTensor)[1].shape
train_X[1]
use_cuda
# for i in train_loader:
#     print(i.size())
#     break

(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 22

AttributeError: 'list' object has no attribute 'size'

(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 224, 224)
(3, 22

In [38]:
def train(epoch,early_stopping = None):
    global train_data#,out,y,predicted
    acc=0
    best_acc =0
    best_val_loss= 100
    loss_hist = []
    val_loss_hist = []
    train_acc_hist = []
    val_acc_hist = []
    train_data={}
    train_data['loss_hist'] = loss_hist
    train_data['val_loss_hist'] = val_loss_hist
    train_data['train_acc_hist'] = train_acc_hist
    train_data['val_acc_hist'] =  val_acc_hist
    e_s= 0
    last_lr = optimizer.param_groups[0]['lr']
    
    for i in range(epoch):
        print('\nThis is epoch:{}'.format(i+1))
        total= 0
        correct=0
        loss_avg= 0
#         scheduler.step()
        scheduler.step(acc)
        if optimizer.param_groups[0]['lr'] < last_lr:
            print('lr change from %f to %f\n' %(last_lr,optimizer.param_groups[0]['lr']))
            last_lr = optimizer.param_groups[0]['lr']

        net.train()
        for j,(batch_x, batch_y) in enumerate(train_loader):
            optimizer.zero_grad()
            if use_cuda:
                batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
            x = Variable(batch_x)
            y = Variable(batch_y)
            out = net(x)
            loss = criterion(out, y)
            loss_avg += loss.cpu().data[0] *out.size()[0]
            loss.backward()
            optimizer.step()
            
            _, predicted = torch.max(out.data, 1)
            total += y.size(0)
            correct += predicted.eq(y.data).cpu().sum()
            progress_bar(j, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (loss_avg/total, 100.*correct/total, correct, total))
            if j % 5==0:
                loss_hist.append(loss_avg/total)
            
        train_acc_hist.append(100.*correct/total)
        e_s+=1
        if i %1 == 0:
            acc, val_loss = test(val_loader)
            val_acc_hist.append(acc)
            if acc >best_acc:
                best_acc= acc
                e_s = 0
                print('acc: Save it!')
                torch.save(net.state_dict(), 'vgg_fcn_acc.pth')
            if val_loss <best_val_loss and loss_avg/total <=val_loss :
                best_val_loss= val_loss
                e_s = 0
                acc= best_acc+ 0.01
                print('loss: Save it!')
                torch.save(net.state_dict(), 'vgg_fcn_loss.pth')
            if loss_avg/total > val_loss:
                e_s = 0
        if early_stopping is not None and e_s >= early_stopping:
            return best_val_loss,best_acc,i

    return best_val_loss,best_acc,i
#         if i%50==0 and save:
#             torch.save(net.state_dict(), 'resnet50.pth')
        
def test(val_load):
    net.eval()
    total = 0
    correct = 0
    loss_avg= 0
    for k, (val_x, val_y) in enumerate(val_load):
        #len(val_x.size())==1
        if use_cuda:
            val_x, val_y = val_x.cuda(), val_y.cuda()
        
        x = Variable(val_x)
        y = Variable(val_y)
        out = net(x)
        if len(out.size())==1: #in case it's one dimensional
            out = out.unsqueeze(0)
        loss = criterion(out, y)
        loss_avg += loss.cpu().data[0] *out.size()[0]
        #print(out.size())
        _, predicted = torch.max(out.data, 1)
        correct += predicted.eq(y.data).cpu().sum()
        total += out.size()[0]
        progress_bar(k, len(val_load), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (loss_avg/total, 100.*correct/total, correct, total))
    train_data['val_loss_hist'].append(loss_avg/total) #also keep track of loss of val set
    acc =  (correct*100.0)/total
    return acc,loss_avg/total

In [59]:
####train with angle and other


def train(epoch,early_stopping = None):
    global train_data#,out,y,predicted
    acc=0
    best_acc =0
    best_val_loss= 100
    loss_hist = []
    val_loss_hist = []
    train_acc_hist = []
    val_acc_hist = []
    train_data={}
    train_data['loss_hist'] = loss_hist
    train_data['val_loss_hist'] = val_loss_hist
    train_data['train_acc_hist'] = train_acc_hist
    train_data['val_acc_hist'] =  val_acc_hist
    e_s= 0
    last_lr = optimizer.param_groups[0]['lr']
    
    for i in range(epoch):
        print('\nThis is epoch:{}'.format(i+1))
        total= 0
        correct=0
        loss_avg= 0
        scheduler.step()
#         scheduler.step(acc)
        if optimizer.param_groups[0]['lr'] < last_lr:
            print('lr change from %f to %f\n' %(last_lr,optimizer.param_groups[0]['lr']))
            last_lr = optimizer.param_groups[0]['lr']
        net.train()
        for j,(batch_x,batch_angle, batch_y) in enumerate(train_loader):
            optimizer.zero_grad()
            batch_angle=batch_angle.type(torch.FloatTensor)
            if use_cuda:
                batch_x,batch_angle, batch_y = batch_x.cuda(),batch_angle.cuda(),batch_y.cuda()
            x = Variable(batch_x)
            angle = Variable(batch_angle)
            y = Variable(batch_y)
            out = net((x, angle))
            loss = criterion(out, y)
            loss_avg += loss.cpu().data[0] *out.size()[0]
            loss.backward()
            optimizer.step()
            
            _, predicted = torch.max(out.data, 1)
            total += y.size(0)
            correct += predicted.eq(y.data).cpu().sum()
            progress_bar(j, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (loss_avg/total, 100.*correct/total, correct, total))
            if j % 5==0:
                loss_hist.append(loss_avg/total)
            
        train_acc_hist.append(100.*correct/total)
        e_s+=1
        if i %1 == 0:
            acc, val_loss = test(val_loader)
            val_acc_hist.append(acc)
            if acc >best_acc:
                best_acc= acc
                e_s = 0
                print('acc: Save it!')
                torch.save(net.state_dict(), 'cnn_ang_acc.pth')
            if val_loss <best_val_loss and loss_avg/total <=val_loss :
                best_val_loss= val_loss
                e_s = 0
                print('loss: Save it!')
                torch.save(net.state_dict(), 'cnn_ang_loss.pth')
            if loss_avg/total >val_loss:
                e_s=0

#             if best_val_loss >= val_loss:
#                 best_val_loss= val_loss
#                 torch.save(net.state_dict(), 'resnet34_loss%d.pth'%i)
        if early_stopping is not None and e_s >= early_stopping:
            return best_val_loss,best_acc,i

    return best_val_loss,best_acc,i
#         if i%50==0 and save:
#             torch.save(net.state_dict(), 'resnet50.pth')
        
def test(val_load):
    net.eval()
    total = 0
    correct = 0
    loss_avg= 0
    for k, (val_x,val_angle, val_y) in enumerate(val_load):
        val_angle=val_angle.type(torch.FloatTensor)
        if use_cuda:
            val_x, val_angle,val_y = val_x.cuda(),val_angle.cuda(), val_y.cuda()
        x = Variable(val_x)
        angle=Variable(val_angle)
        y = Variable(val_y)
        out = net((x,angle))
        if len(out.size())==1:
            out = out.unsqueeze(0)
        loss = criterion(out, y)
        loss_avg += loss.cpu().data[0] *out.size()[0]
        #print(out.size())
        _, predicted = torch.max(out.data, 1)
        correct += predicted.eq(y.data).cpu().sum()
        total += out.size()[0]
        progress_bar(k, len(val_load), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (loss_avg/total, 100.*correct/total, correct, total))
    train_data['val_loss_hist'].append(loss_avg/total) #also keep track of loss of val set
    acc =  (correct*100.0)/total
    return acc,loss_avg/total

In [7]:
vgg16 = pre_vgg.vgg16_bn(pretrained=True)
for param in vgg16.parameters():
    print(param.)

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /home/FDSM_lhn/.torch/models/vgg16_bn-6c64b313.pth
100.0%


Parameter containing:
(0 ,0 ,.,.) = 
  8.2833e-02  2.7968e-02  7.7096e-02
  4.9341e-02 -3.3441e-02  1.9572e-02
  8.0300e-02  7.7076e-02  8.3349e-02

(0 ,1 ,.,.) = 
 -4.4296e-02 -1.7748e-01 -4.8706e-02
 -1.1003e-01 -2.7530e-01 -1.3474e-01
 -5.9982e-03 -6.1375e-02  1.6822e-02

(0 ,2 ,.,.) = 
  2.7480e-02 -6.6769e-02  4.3955e-02
 -2.6662e-02 -1.4995e-01 -3.3615e-02
  5.2778e-02  1.7143e-02  8.6744e-02
     ⋮ 

(1 ,0 ,.,.) = 
 -1.2628e-02  3.0218e-02 -2.6930e-02
 -1.3764e-02  1.1993e-01 -6.6263e-03
 -2.6019e-02 -8.3535e-03 -3.9197e-02

(1 ,1 ,.,.) = 
 -4.0557e-02  1.3983e-02 -5.4278e-02
  1.5412e-02  1.8198e-01  1.7598e-02
 -1.7032e-02  1.1284e-02 -2.4226e-02

(1 ,2 ,.,.) = 
 -6.5683e-02  5.9252e-02 -5.3020e-02
  3.8278e-02  2.7292e-01  5.9491e-02
 -4.1218e-02  3.6159e-02 -3.0478e-02
     ⋮ 

(2 ,0 ,.,.) = 
  1.4962e-06 -1.1430e-06  1.2536e-06
 -1.0341e-06 -5.1964e-06 -1.1568e-06
  2.5825e-06  2.5617e-07  1.6146e-06

(2 ,1 ,.,.) = 
  3.0030e-06  4.2831e-07  2.3388e-06
  2.8718e-07 -3.6006e

In [50]:
# for  i in net.features:
#     print(i)
#     break
# for i in i.parameters():
#     print(i)
len(net.features)

43

In [60]:
#vgg16 = vgg_fcn.vgg16_bn(pretrained=True)
result=[]
for i in range(1):
    vgg16_bn = vgg_fcn.vgg16(pretrained=True)#copy.deepcopy(vgg16)

    num = 256
    vgg16_bn.classifier = nn.Sequential(
                nn.Linear(512+1, num),
                nn.BatchNorm1d(num),
                nn.ReLU(True),
                nn.Dropout(p=0.3),
                nn.Linear(num, num),
                nn.BatchNorm1d(num),
                nn.ReLU(True),
                nn.Dropout(p=0.3),
                nn.Linear(num, 2)
            )

    net= vgg16_bn
    # net.load_state_dict(torch.load('vgg_fcn_loss.pth'))

    criterion = nn.CrossEntropyLoss()

    # #Adam does not perform so good here   
    # #(0.1, 0.0001) (50, 80, 110, 170) 52 epoch reaches the maximum.
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0003, nesterov= True)
    # optimizer = optim.Adam(net.classifier.parameters(), lr=0.00001, weight_decay=0.0003)
    scheduler = MultiStepLR(optimizer, [5,11,16], gamma=0.1)
#     scheduler = MultiStepLR(optimizer, [10,18,26], gamma=0.1)
    # scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
#     scheduler = ReduceLROnPlateau(optimizer, 'max', patience =3,min_lr= 0.00001)
    #5e-3 86
    if use_cuda:
        criterion.cuda()
        net.cuda()
    #     resnet101 = torch.nn.DataParallel(resnet101, device_ids=range(torch.cuda.device_count()))
    #     cudnn.benchmark = True   

    a = train(epoch=60,early_stopping =20)
    result.append(a)


This is epoch:1
acc: Save it!

This is epoch:2
loss: Save it!

This is epoch:3

This is epoch:4
acc: Save it!
loss: Save it!

This is epoch:5

This is epoch:6
lr change from 0.001000 to 0.000100

loss: Save it!

This is epoch:7

This is epoch:8
acc: Save it!
loss: Save it!

This is epoch:9

This is epoch:10
loss: Save it!

This is epoch:11
acc: Save it!
loss: Save it!

This is epoch:12
lr change from 0.000100 to 0.000010

loss: Save it!

This is epoch:13
loss: Save it!

This is epoch:14

This is epoch:15

This is epoch:16

This is epoch:17
lr change from 0.000010 to 0.000001


This is epoch:18

This is epoch:19

This is epoch:20

This is epoch:21

This is epoch:22

This is epoch:23

This is epoch:24

This is epoch:25

This is epoch:26

This is epoch:27

This is epoch:28

This is epoch:29

This is epoch:30
acc: Save it!

This is epoch:31

This is epoch:32

This is epoch:33

This is epoch:34

This is epoch:35

This is epoch:36

This is epoch:37

This is epoch:38

This is epoch:39

This 

Process Process-3043:
KeyboardInterrupt


KeyboardInterrupt: 

Process Process-3044:
Process Process-3045:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
    r = index_queue.get()
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", l

In [81]:
result

[(0.21936874226246297, 91.27725856697819, 39),
 (0.21782404165773006, 91.58878504672897, 55),
 (0.22525541061924254, 90.96573208722741, 53)]

In [83]:
result

[(0.24811193430535147, 90.03115264797508, 29),
 (0.21092650229314405, 91.58878504672897, 28),
 (0.22480700989007207, 90.65420560747664, 53)]

In [69]:
#vgg16 = vgg_fcn.vgg16_bn(pretrained=True)
vgg16_bn = vgg_fcn.vgg16(pretrained=True)#copy.deepcopy(vgg16)

vgg16_bn.classifier = nn.Sequential(
            nn.Linear(512+1, 256),
#             nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Dropout(p=0.3),
            nn.Linear(256, 256),
#             nn.BatchNorm1d(512),
            nn.ReLU(True),
            nn.Dropout(p=0.3),
            nn.Linear(256, 2)
        )

net= vgg16_bn
net.load_state_dict(torch.load('cnn_ang_loss.pth'))
for i in vgg16_bn.features:
    i.requires_grad = False


criterion = nn.CrossEntropyLoss()

# #Adam does not perform so good here   
# #(0.1, 0.0001) (50, 80, 110, 170) 52 epoch reaches the maximum.
optimizer = optim.SGD(net.classifier.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.0003, nesterov= True)
# optimizer = optim.Adam(net.classifier.parameters(), lr=0.00001, weight_decay=0.0003)
scheduler = MultiStepLR(optimizer, [5,10,15], gamma=0.1)
# scheduler = MultiStepLR(optimizer, [8,18], gamma=0.1)
# scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
# scheduler = ReduceLROnPlateau(optimizer, 'max', patience =10,min_lr= 0.0001)
#5e-3 86
if use_cuda:
    criterion.cuda()
    net.cuda()
#     resnet101 = torch.nn.DataParallel(resnet101, device_ids=range(torch.cuda.device_count()))
#     cudnn.benchmark = True   

train(epoch=250,early_stopping =20)


This is epoch:1
acc: Save it!
loss: Save it!

This is epoch:2

This is epoch:3
acc: Save it!

This is epoch:4

This is epoch:5

This is epoch:6
lr change from 0.000100 to 0.000010


This is epoch:7

This is epoch:8

This is epoch:9

This is epoch:10

This is epoch:11
lr change from 0.000010 to 0.000001


This is epoch:12

This is epoch:13

This is epoch:14

This is epoch:15

This is epoch:16
lr change from 0.000001 to 0.000000


This is epoch:17

This is epoch:18

This is epoch:19

This is epoch:20

This is epoch:21

Process Process-8923:
Process Process-8924:
Process Process-8925:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
    r = index_queue.get()
Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/torch/utils/data/dataloader.py", line 36, in _worker_loop
    r = index_queue.get()
  File "/usr/lib/python3.5/multiprocessing/queues.py", line 342, in get
    with self._rlock:
  File "/usr/lib/python3.5/multiprocessing/proc

KeyboardInterrupt: 

In [75]:
test_set = pd.read_json(BASE_dir + 'test.json')
test_X = raw_to_numpy(test_set)
test_X.shape

(8424, 3, 75, 75)

In [77]:
k =np.stack(result).mean(axis=0)
# #sub.shape
# result[1].shape
# np.concatenate(prob).shape

In [80]:
sub=pd.DataFrame()
sub['id'] = test_set['id']
sub['is_iceberg'] =  np.concatenate(prob)
sub.shape
sub.to_csv('submission2.csv',index=False)

In [41]:
temp= pd.read_csv('submission3.csv') #0.0001 wd one
sub['is_iceberg2'] = temp['is_iceberg']
sub.corr()

Unnamed: 0,is_iceberg,is_iceberg2
is_iceberg,1.0,0.886197
is_iceberg2,0.886197,1.0


In [11]:
def train(epoch,early_stopping = None):
    global train_data#,out,y,predicted
    acc=0
    best_acc =0
    best_val_loss= 100
    loss_hist = []
    val_loss_hist = []
    train_acc_hist = []
    val_acc_hist = []
    train_data={}
    train_data['loss_hist'] = loss_hist
    train_data['val_loss_hist'] = val_loss_hist
    train_data['train_acc_hist'] = train_acc_hist
    train_data['val_acc_hist'] =  val_acc_hist
    e_s= 0
    last_lr = optimizer.param_groups[0]['lr']
    
    for i in range(epoch):
        print('\nThis is epoch:{}'.format(i+1))
        total= 0
        correct=0
        loss_avg= 0
        scheduler.step()
#         scheduler.step(acc)
        if optimizer.param_groups[0]['lr'] < last_lr:
            print('lr change from %f to %f\n' %(last_lr,optimizer.param_groups[0]['lr']))
            last_lr = optimizer.param_groups[0]['lr']
        net.train()
        for j,(batch_x,batch_angle, batch_y) in enumerate(train_loader):
            optimizer.zero_grad()
            batch_angle=batch_angle.type(torch.FloatTensor)
            if use_cuda:
                batch_x,batch_angle, batch_y = batch_x.cuda(),batch_angle.cuda(),batch_y.cuda()
            x = Variable(batch_x)
            angle = Variable(batch_angle)
            y = Variable(batch_y)
            out = net((x, angle))
            loss = criterion(out, y)
            loss_avg += loss.cpu().data[0] *out.size()[0]
            loss.backward()
            optimizer.step()
            
            _, predicted = torch.max(out.data, 1)
            total += y.size(0)
            correct += predicted.eq(y.data).cpu().sum()
            progress_bar(j, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (loss_avg/total, 100.*correct/total, correct, total))
            if j % 5==0:
                loss_hist.append(loss_avg/total)
            
        train_acc_hist.append(100.*correct/total)
        e_s+=1
        if i %1 == 0:
            acc, val_loss = test(val_loader)
            val_acc_hist.append(acc)
            if acc >best_acc:
                best_acc= acc
                e_s = 0
                print('acc: Save it!')
                torch.save(net.state_dict(), 'vgg_ang_acc.pth')
            if val_loss <best_val_loss and loss_avg/total <=val_loss :
                best_val_loss= val_loss
                e_s = 0
                print('loss: Save it!')
                torch.save(net.state_dict(), 'vgg_ang_loss.pth')
            if loss_avg/total >val_loss:
                e_s=0

#             if best_val_loss >= val_loss:
#                 best_val_loss= val_loss
#                 torch.save(net.state_dict(), 'resnet34_loss%d.pth'%i)
        if early_stopping is not None and e_s >= early_stopping:
            return best_val_loss,best_acc,i

    return best_val_loss,best_acc,i
#         if i%50==0 and save:
#             torch.save(net.state_dict(), 'resnet50.pth')
        
def test(val_load):
    net.eval()
    total = 0
    correct = 0
    loss_avg= 0
    for k, (val_x,val_angle, val_y) in enumerate(val_load):
        val_angle=val_angle.type(torch.FloatTensor)
        if use_cuda:
            val_x, val_angle,val_y = val_x.cuda(),val_angle.cuda(), val_y.cuda()
        x = Variable(val_x)
        angle=Variable(val_angle)
        y = Variable(val_y)
        out = net((x,angle))
        if len(out.size())==1:
            out = out.unsqueeze(0)
        loss = criterion(out, y)
        loss_avg += loss.cpu().data[0] *out.size()[0]
        #print(out.size())
        _, predicted = torch.max(out.data, 1)
        correct += predicted.eq(y.data).cpu().sum()
        total += out.size()[0]
        progress_bar(k, len(val_load), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (loss_avg/total, 100.*correct/total, correct, total))
    train_data['val_loss_hist'].append(loss_avg/total) #also keep track of loss of val set
    acc =  (correct*100.0)/total
    return acc,loss_avg/total

#Try different transformation

for rou in range(1):
    ran_num = 9220
    seed= np.random.RandomState(ran_num)
    spliter = KFold(n_splits=5,shuffle =True,random_state = seed)
    for k,(train_index, val_index) in enumerate(spliter.split(train_X_del)):
        
        train_dataset = iceberg_angle_dataset(data= train_X[train_index], angle=train_angle[train_index],
                                            label=train_y[train_index],
                                            transform=train_transform, test=True)

        val_dataset = iceberg_angle_dataset(data= train_X[val_index], angle=train_angle[val_index],
                                            label=train_y[val_index],
                                            transform=train_transform, test=True)

        train_loader = DataLoader(train_dataset, batch_size = 16, num_workers=3, 
                                  shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size = 64, num_workers=3)

        
        candidate = []
        for rep in range(1):
            vgg16_bn = vgg_fcn.vgg16(pretrained=True)#copy.deepcopy(vgg16)
            num = 256
            vgg16_bn.classifier = nn.Sequential(
                        nn.Linear(512+1, num),
                        nn.BatchNorm1d(num),
                        nn.ReLU(True),
                        nn.Dropout(p=0.3),
                        nn.Linear(num, num),
                        nn.BatchNorm1d(num),
                        nn.ReLU(True),
                        nn.Dropout(p=0.3),
                        nn.Linear(num, 2)
                    )
            net= vgg16_bn
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.00001, nesterov= True)
            scheduler = MultiStepLR(optimizer, [5,11,16], gamma=0.1)
            #5e-3 86
            if use_cuda:
                criterion.cuda()
                net.cuda()
            result = train(epoch=60,early_stopping =20)
            with open("vgg_models/log.txt", "a") as myfile:
                msg = '10folds, Phase3, At fold {}, seed {},round {} we find one with acc: {}, loss: {}\n'.format(
                                                            k,ran_num,rep+1, result[1], result[0])
                myfile.write(msg)
            cmd = 'cp vgg_ang_loss.pth vgg_ang_loss{}.pth'.format(rep)
            os.system(cmd)
            del vgg16_bn
        
        for g in range(1):
            cmd = 'cp vgg_ang_loss{}.pth vgg_models/r3_5vgg{}_{}{}.pth'.format(g,rou,k,g)
            os.system(cmd)
            


This is epoch:1
acc: Save it!
loss: Save it!

This is epoch:2
loss: Save it!

This is epoch:3
acc: Save it!
loss: Save it!

This is epoch:4
acc: Save it!
loss: Save it!

This is epoch:5
acc: Save it!

This is epoch:6
lr change from 0.001000 to 0.000100

acc: Save it!

This is epoch:7

This is epoch:8
loss: Save it!

This is epoch:9
acc: Save it!
loss: Save it!

This is epoch:10
loss: Save it!

This is epoch:11
loss: Save it!

This is epoch:12
lr change from 0.000100 to 0.000010

loss: Save it!

This is epoch:13
loss: Save it!

This is epoch:14
loss: Save it!

This is epoch:15

This is epoch:16

This is epoch:17
lr change from 0.000010 to 0.000001

acc: Save it!

This is epoch:18

This is epoch:19

This is epoch:20

This is epoch:21

This is epoch:22

This is epoch:23

This is epoch:24

This is epoch:25

This is epoch:26

This is epoch:27

This is epoch:28
loss: Save it!

This is epoch:29

This is epoch:30

This is epoch:31

This is epoch:32

This is epoch:33

This is epoch:34

This is

In [None]:
def train(epoch,early_stopping = None):
    global train_data#,out,y,predicted
    acc=0
    best_acc =0
    best_val_loss= 100
    loss_hist = []
    val_loss_hist = []
    train_acc_hist = []
    val_acc_hist = []
    train_data={}
    train_data['loss_hist'] = loss_hist
    train_data['val_loss_hist'] = val_loss_hist
    train_data['train_acc_hist'] = train_acc_hist
    train_data['val_acc_hist'] =  val_acc_hist
    e_s= 0
    last_lr = optimizer.param_groups[0]['lr']
    
    for i in range(epoch):
        print('\nThis is epoch:{}'.format(i+1))
        total= 0
        correct=0
        loss_avg= 0
        scheduler.step()
#         scheduler.step(acc)
        if optimizer.param_groups[0]['lr'] < last_lr:
            print('lr change from %f to %f\n' %(last_lr,optimizer.param_groups[0]['lr']))
            last_lr = optimizer.param_groups[0]['lr']

        net.train()
        for j,(batch_x, batch_y) in enumerate(train_loader):
            optimizer.zero_grad()
            if use_cuda:
                batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
            x = Variable(batch_x)
            y = Variable(batch_y)
            out = net(x)
            loss = criterion(out, y)
            loss_avg += loss.cpu().data[0] *out.size()[0]
            loss.backward()
            optimizer.step()
            
            _, predicted = torch.max(out.data, 1)
            total += y.size(0)
            correct += predicted.eq(y.data).cpu().sum()
            progress_bar(j, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (loss_avg/total, 100.*correct/total, correct, total))
            if j % 5==0:
                loss_hist.append(loss_avg/total)
            
        train_acc_hist.append(100.*correct/total)
        e_s+=1
        if i %1 == 0:
            acc, val_loss = test(val_loader)
            val_acc_hist.append(acc)
            if acc >best_acc:
                best_acc= acc
                e_s = 0
                print('acc: Save it!')
                torch.save(net.state_dict(), 'vgg_acc.pth')
            if val_loss <best_val_loss and loss_avg/total <=val_loss :
                best_val_loss= val_loss
                e_s = 0
                acc= best_acc+ 0.01
                print('loss: Save it!')
                torch.save(net.state_dict(), 'vgg_loss.pth')
            if loss_avg/total > val_loss:
                e_s = 0
        if early_stopping is not None and e_s >= early_stopping:
            return best_val_loss,best_acc,i

    return best_val_loss,best_acc,i
#         if i%50==0 and save:
#             torch.save(net.state_dict(), 'resnet50.pth')
        
def test(val_load):
    net.eval()
    total = 0
    correct = 0
    loss_avg= 0
    for k, (val_x, val_y) in enumerate(val_load):
        #len(val_x.size())==1
        if use_cuda:
            val_x, val_y = val_x.cuda(), val_y.cuda()
        
        x = Variable(val_x)
        y = Variable(val_y)
        out = net(x)
        if len(out.size())==1: #in case it's one dimensional
            out = out.unsqueeze(0)
        loss = criterion(out, y)
        loss_avg += loss.cpu().data[0] *out.size()[0]
        #print(out.size())
        _, predicted = torch.max(out.data, 1)
        correct += predicted.eq(y.data).cpu().sum()
        total += out.size()[0]
        progress_bar(k, len(val_load), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (loss_avg/total, 100.*correct/total, correct, total))
    train_data['val_loss_hist'].append(loss_avg/total) #also keep track of loss of val set
    acc =  (correct*100.0)/total
    return acc,loss_avg/total
        

#Try different transformation

for rou in range(1):
    ran_num = 9021
    seed= np.random.RandomState(ran_num)
    spliter = StratifiedKFold(n_splits=5,shuffle =True,random_state = seed)
    for k,(train_index, val_index) in enumerate(spliter.split(train_X_del, train_y_del)):
        
        train_mean, train_std = transform_compute(train_X_del[train_index])
        train_transform = T.Compose([
            T.Normalize(train_mean, train_std)
        ])
        #af_train_X, af_train_y = data_aug(train_X_del[train_index], train_y_del[train_index])
        #af_train_X, af_train_y = data_aug2(train_X_del[train_index], train_y_del[train_index])
        af_train_X, af_train_y = train_X_del[train_index], train_y_del[train_index]

        train_dataset = iceberg_dataset(data= af_train_X, label=af_train_y, transform=train_transform)
        val_dataset = iceberg_dataset(data= train_X_del[val_index], label=train_y_del[val_index], transform=train_transform, test=True)

        train_loader = DataLoader(train_dataset, batch_size = 16, num_workers=3, 
                                  shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size = 64, num_workers=3)
        
        candidate = []
        for rep in range(2):
            vgg16_bn = vgg_fcn.vgg16_bn(pretrained=True)#copy.deepcopy(vgg16)
            # vgg16_bn.avg= nn.Conv2d(512, 512, kernel_size=2,
            #                                bias=False)
            vgg16_bn.classifier = nn.Sequential(
                        nn.Dropout(p=0.4),
                        nn.Conv2d(512,512, kernel_size= 3,padding=1),
                        nn.BatchNorm2d(512),
                        nn.ReLU(True),
                        nn.Dropout(p=0.6),
                        nn.Conv2d(512, 2, kernel_size=3, padding=1,
                                           bias=False),
                        nn.AvgPool2d(3)
                    )


            net= vgg16_bn

            criterion = nn.CrossEntropyLoss()

            # #Adam does not perform so good here   
            # #(0.1, 0.0001) (50, 80, 110, 170) 52 epoch reaches the maximum.
            optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0003, nesterov= True)
            # optimizer = optim.Adam(net.classifier.parameters(), lr=0.00001, weight_decay=0.0003)
            scheduler = MultiStepLR(optimizer, [5,11], gamma=0.1)
            # scheduler = StepLR(optimizer, step_size=7, gamma=0.5)
            # scheduler = ReduceLROnPlateau(optimizer, 'max', patience =10,min_lr= 0.0001)
            #5e-3 86
            if use_cuda:
                criterion.cuda()
                net.cuda()
            #     resnet101 = torch.nn.DataParallel(resnet101, device_ids=range(torch.cuda.device_count()))
            #     cudnn.benchmark = True   

            result = train(epoch=100,early_stopping =20)
            with open("vgg_models/log.txt", "a") as myfile:
                msg = '5folds, Phase1, At fold {}, seed {},round {} we find one with acc: {}, loss: {}\n'.format(
                                                            k,ran_num,rep+1, result[1], result[0])
                myfile.write(msg)
            cmd = 'cp vgg_loss.pth vgg_loss{}.pth'.format(rep)
            os.system(cmd)

        #actually an array
        #also change here

        
        for g in range(2):
            cmd = 'cp vgg_loss{}.pth vgg_models/r1_5vgg{}_{}{}.pth'.format(g,rou,k,g)
            os.system(cmd)

In [28]:
temp11 = pd.DataFrame()
# temp11= pd.read_csv('plain_cnn_15_models.csv')
test = pd.read_json(BASE_dir + 'test.json')
test_X = raw_to_numpy(test)
test_X.shape 
fake_label = np.zeros(len(test_X))

test_dataset = iceberg_dataset(data= test_X, label=fake_label, transform=train_transform,test=True)

test_loader = DataLoader(test_dataset, batch_size = 64, num_workers=3)




waiting_list=  


#waiting_list = [i for i in os.listdir('vgg_models/') if 'r1' in i]
waiting_list= [os.path.join('vgg_models', i) for i in waiting_list] 
vgg16_bn = vgg_fcn.vgg16_bn(pretrained=True)#copy.deepcopy(vgg16)
# vgg16_bn.avg= nn.Conv2d(512, 512, kernel_size=2,
#                                bias=False)

# vgg16_bn.classifier = nn.Sequential(
#             nn.Linear(512, 512),
#             nn.BatchNorm1d(512),
#             nn.ReLU(True),
#             nn.Dropout(p=0.5),
#             nn.Linear(512, 512),
#             nn.BatchNorm1d(512),
#             nn.ReLU(True),
#             nn.Dropout(p=0.6),
#             nn.Linear(512, 2)
#         )


vgg16_bn.classifier = nn.Sequential(
            nn.Dropout(p=0.4),
            nn.Conv2d(512,512, kernel_size= 3,padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.Dropout(p=0.6),
            nn.Conv2d(512, 2, kernel_size=3, padding=1,
                               bias=False),
            nn.AvgPool2d(3)
        )

net= vgg16_bn

print(waiting_list)
for i,pth in enumerate(waiting_list):
    net.load_state_dict(torch.load(pth))
    net.cuda()
    prob = [] 
    net.eval()
    for k, (val_x, val_y) in enumerate(test_loader):
        if use_cuda:
            val_x, val_y = val_x.cuda(), val_y.cuda()
        x = Variable(val_x)
        y = Variable(val_y)
        out = net(x)
        #prevent overflow
        temp = np.exp(out.cpu().data.numpy()-np.max(out.cpu().data.numpy(),axis=1)[:,np.newaxis])
        ans= temp[:,1]/(temp.sum(axis=1))
        prob.append(ans)
        #print(out.size())
        progress_bar(k, len(test_loader))
    msg = 'is_iceberg%d' % (i)
    temp11[msg]= np.concatenate(prob)

['vgg_models/r1_5vgg0_00.pth', 'vgg_models/r1_5vgg0_11.pth', 'vgg_models/r1_5vgg0_20.pth', 'vgg_models/r1_5vgg0_31.pth', 'vgg_models/r1_5vgg0_40.pth']


In [14]:
temp11 = pd.DataFrame()
# temp11= pd.read_csv('plain_cnn_15_models.csv')
test = pd.read_json(BASE_dir + 'test.json')
test_X = raw_to_numpy(test)
test_X.shape 
fake_label = np.zeros(len(test_X))

test_dataset = iceberg_angle_dataset(data= test_X, label=fake_label,angle=test.inc_angle.values.astype(np.float), transform=train_transform,test=True)

test_loader = DataLoader(test_dataset, batch_size = 64, num_workers=3)


vgg16_bn = vgg_fcn.vgg16(pretrained=True)#copy.deepcopy(vgg16)
num = 256
vgg16_bn.classifier = nn.Sequential(
            nn.Linear(512+1, num),
            nn.BatchNorm1d(num),
            nn.ReLU(True),
            nn.Dropout(p=0.3),
            nn.Linear(num, num),
            nn.BatchNorm1d(num),
            nn.ReLU(True),
            nn.Dropout(p=0.3),
            nn.Linear(num, 2)
        )


# waiting_list=  ['r2_10vgg0_01.pth',
# 'r2_10vgg0_10.pth',
# 'r2_10vgg0_21.pth',
# 'r2_10vgg0_31.pth',
# 'r2_10vgg0_41.pth',
# 'r2_10vgg0_51.pth',
# 'r2_10vgg0_60.pth',
# 'r2_10vgg0_70.pth',
# 'r2_10vgg0_81.pth',
# 'r2_10vgg0_90.pth'
# ]
# waiting_list = ['r2_5vgg0_01.pth',
#                 'r2_5vgg0_10.pth',
#                 'r2_5vgg0_21.pth',
#                 'r2_5vgg0_30.pth',
#                 'r2_5vgg0_40.pth']
waiting_list = [i for i in os.listdir('vgg_models') if 'r3' in i]


#waiting_list = [i for i in os.listdir('vgg_models/') if 'r1' in i]
waiting_list= [os.path.join('vgg_models', i) for i in waiting_list] 
net= vgg16_bn

for i,pth in enumerate(waiting_list):
    net.load_state_dict(torch.load(pth))
    net.cuda()
    prob = [] 
    net.eval()
    for k, (val_x,val_angle, val_y) in enumerate(test_loader):
        val_angle=val_angle.type(torch.FloatTensor)
        if use_cuda:
            val_x, val_angle,val_y = val_x.cuda(),val_angle.cuda(), val_y.cuda()
        x = Variable(val_x)
        angle=Variable(val_angle)
        y = Variable(val_y)
        out = net((x,angle))
        #prevent overflow
        temp = np.exp(out.cpu().data.numpy()-np.max(out.cpu().data.numpy(),axis=1)[:,np.newaxis])
        ans= temp[:,1]/(temp.sum(axis=1))
        prob.append(ans)
        #print(out.size())
        progress_bar(k, len(test_loader))
    msg = 'is_iceberg%d' % (i)
    temp11[msg]= np.concatenate(prob)



In [15]:
temp11.corr()
# [i for i in os.listdir('vgg_models') if 'r3' in i]

Unnamed: 0,is_iceberg0,is_iceberg1,is_iceberg2,is_iceberg3,is_iceberg4
is_iceberg0,1.0,0.715401,0.68443,0.793467,0.894659
is_iceberg1,0.715401,1.0,0.888102,0.682418,0.741345
is_iceberg2,0.68443,0.888102,1.0,0.685948,0.679946
is_iceberg3,0.793467,0.682418,0.685948,1.0,0.698735
is_iceberg4,0.894659,0.741345,0.679946,0.698735,1.0


In [7]:
#result_hist

temp11 = pd.DataFrame()

for i in range(5):
    net = resnet.resnet34(num_classes=2)
    net.load_state_dict(torch.load('resnet34_acc%d.pth'%i))
    net.cuda()

    test = pd.read_json(BASE_dir + 'test.json')
    test_X = raw_to_numpy(test)
    test_X.shape 
    fake_label = np.zeros(len(test_X))

    test_dataset = iceberg_dataset(data= test_X, label=fake_label, transform=train_transform,test=True)

    test_loader = DataLoader(test_dataset, batch_size = 64, num_workers=3)

    prob = [] 
    net.eval()
    for k, (val_x, val_y) in enumerate(test_loader):
        if use_cuda:
            val_x, val_y = val_x.cuda(), val_y.cuda()
        x = Variable(val_x)
        y = Variable(val_y)
        out = net(x)
        #prevent overflow
        temp = np.exp(out.cpu().data.numpy()-np.max(out.cpu().data.numpy(),axis=1)[:,np.newaxis])
        ans= temp[:,1]/(temp.sum(axis=1))
        prob.append(ans)
        #print(out.size())
        progress_bar(k, len(test_loader))
    msg = 'is_iceberg%d' %i
    temp11[msg]= np.concatenate(prob)



In [16]:
sub=pd.DataFrame()
sub['id'] = test['id']
sub['is_iceberg'] = temp11.median(axis=1)
sub.shape
sub.to_csv('submission23.csv',index=False)

In [25]:
temp11['is_iceberg_max'] = temp11.iloc[:, 0:6].max(axis=1)
temp11['is_iceberg_min'] = temp11.iloc[:, 0:6].min(axis=1)
temp11['is_iceberg_median'] = temp11.iloc[:, 0:6].median(axis=1)
# set up cutoff threshold for lower and upper bounds, easy to twist 
cutoff_lo = 0.8
cutoff_hi = 0.2

temp11['is_iceberg_base'] = temp11['is_iceberg5']
temp11['is_iceberg'] = np.where(np.all(temp11.iloc[:,0:6] > cutoff_lo, axis=1), 
                                    temp11['is_iceberg_max'], 
                                    np.where(np.all(temp11.iloc[:,0:6] < cutoff_hi, axis=1),
                                             temp11['is_iceberg_min'], 
                                             temp11['is_iceberg_base']))


In [28]:
sub=pd.DataFrame()
sub['id'] = test['id']
sub['is_iceberg'] = temp11['is_iceberg5']
sub.shape
sub.to_csv('submission5.csv',index=False)

In [8]:
net = resnet.resnet34(num_classes=2)
net.load_state_dict(torch.load('save_resnet34_acc117.pth'))
net.cuda()

test = pd.read_json(BASE_dir + 'test.json')
test_X = raw_to_numpy(test)
test_X.shape 
fake_label = np.zeros(len(test_X))

test_dataset = iceberg_dataset(data= test_X, label=fake_label, transform=train_transform,test=True)

test_loader = DataLoader(test_dataset, batch_size = 64, num_workers=3)

prob = [] 
net.eval()
for k, (val_x, val_y) in enumerate(test_loader):
    if use_cuda:
        val_x, val_y = val_x.cuda(), val_y.cuda()
    x = Variable(val_x)
    y = Variable(val_y)
    out = net(x)
    #prevent overflow
    temp = np.exp(out.cpu().data.numpy()-np.max(out.cpu().data.numpy(),axis=1)[:,np.newaxis])
    ans= temp[:,1]/(temp.sum(axis=1))
    prob.append(ans)
    #print(out.size())
    progress_bar(k, len(test_loader))
msg = 'is_iceberg%d' %5
temp11[msg]= np.concatenate(prob)



In [None]:
temp11.iloc[:,0:5]

In [27]:
temp11.corr()

Unnamed: 0,is_iceberg0,is_iceberg1,is_iceberg2,is_iceberg3,is_iceberg4,is_iceberg5,is_iceberg_max,is_iceberg_min,is_iceberg_median,is_iceberg_base,is_iceberg
is_iceberg0,1.0,0.852644,0.822586,0.648968,0.883101,0.905277,0.682861,0.922862,0.942663,0.905277,0.9059
is_iceberg1,0.852644,1.0,0.905401,0.75471,0.833295,0.815734,0.821258,0.777728,0.95619,0.815734,0.81663
is_iceberg2,0.822586,0.905401,1.0,0.771766,0.774018,0.784324,0.847868,0.73863,0.918857,0.784324,0.785453
is_iceberg3,0.648968,0.75471,0.771766,1.0,0.685649,0.556919,0.940914,0.592617,0.749656,0.556919,0.559032
is_iceberg4,0.883101,0.833295,0.774018,0.685649,1.0,0.826391,0.685683,0.920097,0.909537,0.826391,0.827514
is_iceberg5,0.905277,0.815734,0.784324,0.556919,0.826391,1.0,0.653849,0.895245,0.89622,1.0,0.999683
is_iceberg_max,0.682861,0.821258,0.847868,0.940914,0.685683,0.653849,1.0,0.583326,0.792055,0.653849,0.655435
is_iceberg_min,0.922862,0.777728,0.73863,0.592617,0.920097,0.895245,0.583326,1.0,0.875356,0.895245,0.895989
is_iceberg_median,0.942663,0.95619,0.918857,0.749656,0.909537,0.89622,0.792055,0.875356,1.0,0.89622,0.897011
is_iceberg_base,0.905277,0.815734,0.784324,0.556919,0.826391,1.0,0.653849,0.895245,0.89622,1.0,0.999683


In [11]:
seed= np.random.RandomState(67)
spliter = KFold(n_splits=5,shuffle =True,random_state = seed)
for i in spliter.split(list(range(100))):
    print(i)
    break

(array([ 1,  2,  5,  6,  7,  8,  9, 10, 12, 13, 15, 16, 18, 19, 20, 21, 22,
       23, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 41, 42,
       44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 62,
       63, 65, 67, 68, 69, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 94, 95, 96, 98, 99]), array([ 0,  3,  4, 11, 14, 17, 24, 29, 40, 43, 48, 59, 64, 66, 70, 79, 82,
       83, 93, 97]))


In [19]:
temp11 = pd.DataFrame()

test = pd.read_json(BASE_dir + 'test.json')
test_X = raw_to_numpy(test)
test_X.shape 
fake_label = np.zeros(len(test_X))

test_dataset = iceberg_dataset(data= test_X, label=fake_label, transform=train_transform,test=True)

test_loader = DataLoader(test_dataset, batch_size = 64, num_workers=3)


for i,pth in enumerate([os.path.join('resnet34_save_model',i) for i in os.listdir(path='resnet34_save_model') if '.pth' in i]):
    net = resnet.resnet34(num_classes=2)
    net.load_state_dict(torch.load(pth))
    net.cuda()
    prob = [] 
    net.eval()
    for k, (val_x, val_y) in enumerate(test_loader):
        if use_cuda:
            val_x, val_y = val_x.cuda(), val_y.cuda()
        x = Variable(val_x)
        y = Variable(val_y)
        out = net(x)
        #prevent overflow
        temp = np.exp(out.cpu().data.numpy()-np.max(out.cpu().data.numpy(),axis=1)[:,np.newaxis])
        ans= temp[:,1]/(temp.sum(axis=1))
        prob.append(ans)
        #print(out.size())
        progress_bar(k, len(test_loader))
    msg = 'is_iceberg%d' % i
    temp11[msg]= np.concatenate(prob)



In [26]:
sub=pd.DataFrame()
sub['id'] = test['id']
sub['is_iceberg'] = temp11['is_iceberg']
sub.shape
sub.to_csv('submission2.csv',index=False)

In [24]:
result = temp11.mean(1)
temp11.head()

Unnamed: 0,is_iceberg0,is_iceberg1,is_iceberg2,is_iceberg3
0,0.007027504,0.09244031,0.01784263,0.005578169
1,0.003931345,0.3659658,0.2564293,0.01571568
2,5.239599e-10,1.97075e-21,3.803356e-08,2.089403e-21
3,0.9993261,0.9456407,0.9853242,0.9989353
4,0.001448082,0.06435396,0.03096765,0.0002362306


In [25]:
temp11['is_iceberg_max'] = temp11.iloc[:, :4].max(axis=1)
temp11['is_iceberg_min'] = temp11.iloc[:, :4].min(axis=1)
temp11['is_iceberg_median'] = temp11.iloc[:, :4].median(axis=1)
# set up cutoff threshold for lower and upper bounds, easy to twist 
cutoff_lo = 0.8
cutoff_hi = 0.2

temp11['is_iceberg_base'] = temp11['is_iceberg3']
temp11['is_iceberg'] = np.where(np.all(temp11.iloc[:,0:6] > cutoff_lo, axis=1), 
                                    temp11['is_iceberg_max'], 
                                    np.where(np.all(temp11.iloc[:,0:6] < cutoff_hi, axis=1),
                                             temp11['is_iceberg_min'], 
                                             temp11['is_iceberg_base']))

In [73]:
#! cp vgg_fcn.ipynb vgg_angle.ipynb
temp11.to_csv('others/vgg_10fold.csv',index=False)