In [3]:
import os
import pandas as pd
from sklearn import model_selection

if __name__ == "__main__":
    input_path = "D:\\Dataset\\melanoma-128-128\\"
    df = pd.read_csv(os.path.join(input_path, "train.csv"))
    df = df.loc[df.tfrecord!=-1].reset_index(drop=True)
    df["kfold"] = -1
    df = df.sample(frac=1).reset_index(drop=True)
    y = df.target.values
    skf = model_selection.KFold(n_splits=5,shuffle=True,random_state=42)
    for fold,(idxT,idxV) in enumerate(skf.split(np.arange(15))):
        df.loc[df.tfrecord.isin(idxV), "kfold"] = fold
    print(df.target.value_counts())
    df.to_csv(os.path.join(input_path, "train_folds.csv"), index=False)

0    32111
1      581
Name: target, dtype: int64


In [17]:
df[df.tfrecord==3].target.value_counts()

0    6399
1     116
Name: target, dtype: int64

In [None]:
Dataset()

In [1]:
from PIL import Image
import torch
import numpy as np

class ClassificationDataset:
    def __init__(self, image_paths, targets, tabular, resize, augmentations=None):
        self.image_paths = image_paths
        self.targets = targets
        self.resize = resize
        self.augmentations = augmentations
        self.tabular=tabular.copy()
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, item):
        image = Image.open(self.image_paths[item])
        targets = self.targets[item]
        tabular = self.tabular[item,:]
        if self.resize is not None:
            image = image.resize(
                (self.resize[1], self.resize[0]), resample=Image.BILINEAR
            )
        image = np.array(image)
        if self.augmentations is not None:
            augmented = self.augmentations(image=image)
            image = augmented["image"]
        image = np.transpose(image, (2, 0, 1)).astype(np.float32)
        return {
            "image": torch.tensor(image),
            "targets": torch.tensor(targets),
            "tabular": torch.tensor(tabular),
        }

In [50]:
torch.__version__

'1.5.1'

In [1]:
import os
import torch
import pandas as pd
from sklearn import model_selection
import albumentations 
import pretrainedmodels
import albumentations as A
import albumentations as aug
import numpy as np
import pandas as pd
import torch.nn as nn
from sklearn.model_selection import KFold
import Dataset

from apex import amp
from sklearn import metrics
from torch.nn import functional as F

from wtfml.data_loaders.image import ClassificationLoader
from wtfml.engine import Engine
from wtfml.utils import EarlyStopping
from albumentations import (
    HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose
)

import efficientnet_pytorch

"""
class SEResNext50_32x4d(nn.Module):
    def __init__(self, pretrained="imagenet"):
        super(SEResNext50_32x4d, self).__init__()
        self.model = pretrainedmodels.__dict__[
            "se_resnext50_32x4d"
        ](pretrained=pretrained)
        #for param in self.model.parameters():
        #    param.requires_grad=False
        self.out = nn.Linear(2048, 1)
    
    def forward(self, image, targets):
        bs, _, _, _ = image.shape
        x = self.model.features(image)
        x = F.adaptive_avg_pool2d(x, 1)
        x = x.reshape(bs, -1)
        out = self.out(x)
        loss = nn.BCEWithLogitsLoss()(
            out, targets.reshape(-1, 1).type_as(out)
        )
        return out, loss
      
class EfficientNet(nn.Module):
    def __init__(self):
        super(EfficientNet, self).__init__()
        self.base_model = efficientnet_pytorch.EfficientNet.from_pretrained(
            'efficientnet-b5'
        )
        self.base_model._fc = nn.Linear(
            in_features=2048, 
            out_features=1000, 
            bias=True
        )
        
        self.=nn.Linear(self.num_ftrs,1000)
        self.csv = nn.Sequential(nn.Linear(9, 500),
                                 nn.BatchNorm1d(500),
                                 nn.ReLU(),
                                 nn.Dropout(p=0.3),
                                 
                                 nn.Linear(500, 500),
                                 nn.BatchNorm1d(500),
                                 nn.ReLU(),
                                 nn.Dropout(p=0.3))
        #print(self.base_model)
        
    def forward(self, image, targets):
        out = self.base_model(image)
        loss = nn.BCEWithLogitsLoss()(out, targets.view(-1, 1).type_as(out))
        return out, loss
"""

class EfficientNet(nn.Module):
    def __init__(self):
        super().__init__()        

        self.features = efficientnet_pytorch.EfficientNet.from_pretrained('efficientnet-b5')
        #print(self.features)
        self.classification = nn.Linear(2048, 1)
        #print(self.features)

        
        
    def forward(self, image, targets):    
        
        image = self.features.extract_features(image)
            
        image = F.avg_pool2d(image, image.size()[2:]).reshape(-1, 2048)
        
        out = self.classification(image)
        loss = nn.BCEWithLogitsLoss()(out, targets.view(-1, 1).type_as(out))
        return out, loss

def train(fold):
    training_data_path = "D:\\Dataset\\New folder\\train\\"
    model_path = "D:\\Dataset\\New folder\\"
    df = pd.read_csv("D:\\Dataset\\New folder\\train_folds.csv")
    device = "cuda"
    epochs = 50
    train_bs = 8
    valid_bs = 8
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    p=0.5
    #df=df.loc[df.tfrecord!=-1]
    df_train=df.loc[df.kfold!=0].reset_index(drop=True)
    df_valid=df.loc[df.kfold==0].reset_index(drop=True)
    train_aug = albumentations.Compose(
        [
        A.Cutout(p=p),
        A.RandomRotate90(p=p),
        A.Flip(p=p),
        A.OneOf([
            A.RandomBrightnessContrast(brightness_limit=0.2,
                                       contrast_limit=0.2,
                                       ),
            A.HueSaturationValue(
                hue_shift_limit=20,
                sat_shift_limit=50,
                val_shift_limit=50)
        ], p=p),
        A.OneOf([
            A.IAAAdditiveGaussianNoise(),
            A.GaussNoise(),
        ], p=p),
        A.OneOf([
            A.MotionBlur(p=0.2),
            A.MedianBlur(blur_limit=3, p=0.1),
            A.Blur(blur_limit=3, p=0.1),
        ], p=p),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=p),
        A.OneOf([
            A.OpticalDistortion(p=0.3),
            A.GridDistortion(p=0.1),
            A.IAAPiecewiseAffine(p=0.3),
        ], p=p),
        A.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),  
        ]

        
    )

    valid_aug = albumentations.Compose(
        [
            A.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
        ]
    )

    train_images = df_train.image_name.values.tolist()
    train_images = [os.path.join(training_data_path, i + ".jpg") for i in train_images]
    train_targets = df_train.target.values

    valid_images = df_valid.image_name.values.tolist()
    valid_images = [os.path.join(training_data_path, i + ".jpg") for i in valid_images]
    valid_targets = df_valid.target.values
    

    train_dataset = ClassificationLoader(
        image_paths=train_images,
        targets=train_targets,
        resize=(256,256),
        augmentations=train_aug
    )
    

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=train_bs,
        shuffle=True,
        num_workers=4
    )
    


    valid_dataset = ClassificationLoader(
        image_paths=valid_images,
        targets=valid_targets,
        resize=(256,256),
        augmentations=valid_aug
    )

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=valid_bs,
        shuffle=False,
        num_workers=4)

    model = EfficientNet()
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=3,
        mode="max"
    )

    model, optimizer = amp.initialize(
        model,
        optimizer,
        opt_level="O1",
        verbosity=0
    )

    es = EarlyStopping(patience=5, mode="max")
    for epoch in range(epochs):
        training_loss = Engine.train(
            train_loader, 
            model,
            optimizer,
            device,
            fp16=True
        )
        predictions, valid_loss = Engine.evaluate(
            valid_loader, 
            model,
            device
        )
        predictions = np.vstack((predictions)).ravel()

        auc = metrics.roc_auc_score(valid_targets, predictions)
        scheduler.step(auc)
        print(f"epoch={epoch}, auc={auc}")
        es(auc, model, os.path.join(model_path, f"model{fold}.bin"))
        if es.early_stop:
            print("early stopping")
            break


def predict(fold):
    test_data_path = "D:\\Dataset\\melanoma-384-384\\test\\"
    model_path = "D:\\Dataset\\melanoma-384-384\\"
    df_test = pd.read_csv("D:\\Dataset\\melanoma-384-384\\test.csv")
    df_test.loc[:, "target"] = 0
    p=0.5
    device = "cuda"
    epochs = 50
    test_bs = 16
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    train_aug = albumentations.Compose(
        [
            
           A.RandomRotate90(p=p),
        A.Flip(p=p),
        A.OneOf([
            A.RandomBrightnessContrast(brightness_limit=0.2,
                                       contrast_limit=0.2,
                                       ),
            A.HueSaturationValue(
                hue_shift_limit=20,
                sat_shift_limit=50,
                val_shift_limit=50)
        ], p=p),
        A.OneOf([
            A.IAAAdditiveGaussianNoise(),
            A.GaussNoise(),
        ], p=p),
        aug.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),  
        ]
    )

    valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
        ]
    )


    test_images = df_test.image_name.values.tolist()
    test_images = [os.path.join(test_data_path, i + ".jpg") for i in test_images]
    test_targets = df_test.target.values

    
    test_dataset = ClassificationLoader(
        image_paths=test_images,
        targets=test_targets,
        resize=(384,384),
        augmentations=train_aug
    )
    #print(len(test_dataset))
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=test_bs,
        shuffle=False,
        num_workers=4
    )

    model = SEResNext50_32x4d()
    model.load_state_dict(torch.load(os.path.join(model_path, f"model{fold}.bin")))
    model.to(device)

    predictions = Engine.predict(
        test_loader,
        model,
        device
    )
    return np.vstack((predictions)).ravel()


if __name__ == "__main__":
    train(fold=0)

Loaded pretrained weights for efficientnet-b5


  1%|██▎                                                                                                                                                          | 48/3270 [01:21<1:31:10,  1.70s/it, loss=0.386]


KeyboardInterrupt: 

In [8]:
#df_fold3=pd.DataFrame(final_predicted_fold_3, columns=['fold3'])
#df_fold4=pd.DataFrame(final_predicted_fold_4, columns=['fold4'])
#df_fold0=pd.DataFrame(final_predicted_fold_0, columns=['fold0'])
df_fold1=pd.DataFrame(final_predicted_fold_1, columns=['fold0'])
df_fold2=pd.DataFrame(final_predicted_fold_2, columns=['fold0'])

In [9]:
#df_fold3.to_csv(r'D:/Dataset/melanoma_fold_3.csv', index=False)
#df_fold4.to_csv(r'D:/Dataset/melanoma_fold_4.csv', index=False)
#df_fold0.to_csv(r'D:/Dataset/melanoma_fold_0.csv', index=False)
df_fold1.to_csv(r'D:/Dataset/melanoma_fold_1.csv', index=False)
df_fold2.to_csv(r'D:/Dataset/melanoma_fold_2.csv', index=False)

In [12]:
#dff3=pd.read_csv(r'D:/Dataset/melanoma_fold_3.csv')
#dff4=pd.read_csv(r'D:/Dataset/melanoma_fold_4.csv')
#dff0=pd.read_csv(r'D:/Dataset/melanoma_fold_0.csv')
dff1=pd.read_csv(r'D:/Dataset/melanoma_fold_1.csv')
dff2=pd.read_csv(r'D:/Dataset/melanoma_fold_2.csv')

In [35]:
final_pred=(dff3['fold3']+dff4['fold4']+dff0['fold0']+dff1['fold0']+dff2['fold0'])/5

In [37]:
final_pred=(final_pred+melo128['target'])/2

In [32]:
melo128=pd.read_csv(r'D:\Dataset\melanoma-128-128\submission_128_128_stratified.csv')

In [25]:
final_pred=(final_predicted_fold_0+final_predicted_fold_1+final_predicted_fold_2+final_predicted_fold_3+final_predicted_fold_4)/5

In [34]:
melo128['target']

0       -7.080936
1       -7.145298
2       -7.702933
3       -8.029714
4       -5.291790
           ...   
10977   -5.032100
10978   -3.916981
10979   -3.197421
10980   -7.903467
10981   -4.164320
Name: target, Length: 10982, dtype: float64

In [38]:
sample = pd.read_csv("D:\\Dataset\\melanoma-384-384\\sample_submission.csv")
sample.loc[:, "target"] = final_pred
sample.to_csv("D:\\Dataset\\melanoma-384-384\\submission_final_pred.csv", index=False)

In [13]:
pred_128 = pd.read_csv("D://Dataset//melanoma-128-128/submission_128_128.csv")

In [16]:
compbo=(pred_128['target']+final_pred)/2

In [17]:
compbo

0       -6.951172
1       -8.158008
2       -9.777734
3       -9.273438
4       -4.504590
           ...   
10977   -6.233594
10978   -3.941406
10979   -2.295850
10980   -8.334375
10981   -3.490332
Name: target, Length: 10982, dtype: float64

In [16]:
fin=(pred_df2['target'])

In [5]:
1/(1+np.exp(-final_pred))

array([2.9295834e-04, 5.6412653e-05, 3.5608546e-06, ..., 2.3136349e-02,
       3.1408892e-04, 1.9708587e-02], dtype=float32)

In [5]:
a=torch.from_numpy(final_pred)

In [6]:
out=torch.nn.Sigmoid()(a).numpy()

## out[out>0.3]

In [7]:
out

array([1.1584830e-04, 5.0462342e-05, 1.2344542e-05, ..., 1.0784917e-01,
       5.5341388e-04, 1.2191330e-02], dtype=float32)

In [28]:
test=pd.DataFrame()
test = np.zeros((5, 1))


In [78]:
test=[]
pred=[1.99,2,2,4,2,2]
pediction=np.array(pred)

In [79]:
for i in range(0,2):
    test.append(pediction+i)

In [84]:
test

[array([1.99, 2.  , 2.  , 4.  , 2.  , 2.  ]),
 array([2.99, 3.  , 3.  , 5.  , 3.  , 3.  ])]

In [85]:
v=np.mean(test,axis=0)

In [87]:
v 

array([2.49, 2.5 , 2.5 , 4.5 , 2.5 , 2.5 ])

In [58]:
np.sum((1.99+2.99))

4.98

In [82]:
test

[array([1.99, 2.  , 2.  , 4.  , 2.  , 2.  ]),
 array([2.99, 3.  , 3.  , 5.  , 3.  , 3.  ])]

In [86]:
np.argmax(v, axis=-1)


3

In [70]:
a=np.array([[1.99],
        [2.  ],
        [6.  ],
        [3.  ],
        [4.  ],
        [5.  ]])

In [16]:
model = pretrainedmodels.__dict__[
            "se_resnext50_32x4d"
        ](pretrained='imagenet')

In [26]:
for param in model.parameters():
    param.requires_grad=False

layer0
layer1
layer2
layer3
layer4
avg_pool
last_linear


In [29]:
efficientnet_pytorch.EfficientNet.from_pretrained('efficientnet-b6')

Loaded pretrained weights for efficientnet-b6


EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 56, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(0, 1, 0, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(56, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        56, 56, kernel_size=(3, 3), stride=[1, 1], groups=56, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(56, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        56, 14, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        14, 56, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        56, 32, kernel_siz

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
import numpy as np
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier as knn

#Dataset from sklearn
data = load_digits()
data_D = preprocessing.StandardScaler().fit_transform(data.data)
data_L = data.target
data_train, data_test, label_train, label_test = train_test_split(data_D,data_L,random_state=1,test_size=0.7)

def SelectModel(modelname):

    if modelname == "SVM":
        
        model = SVC(kernel='rbf', C=16, gamma=0.125,probability=True)

    elif modelname == "GBDT":

        model = GradientBoostingClassifier()

    elif modelname == "RF":

        model = RandomForestClassifier()

    elif modelname == "XGBOOST":

        model = xgb()

    elif modelname == "KNN":
        
        model = knn()

    else:

        pass

    return model


def Stacker_(clf, n_folds, X_train, y_train, X_test):

    ntrain = X_train.shape[0]

    ntest = X_test.shape[0]

    classnum = len(np.unique(y_train))

    kf = KFold(n_splits=n_folds,random_state=1)

    oof_train = np.zeros((ntrain,classnum))

    oof_test = np.zeros((ntest,classnum))

    for i,(train_index, test_index) in enumerate(kf.split(X_train)):

        kf_X_train = X_train[train_index] # data

        kf_y_train = y_train[train_index] # label

        kf_X_test = X_train[test_index] # k-fold verification set

        clf.fit(kf_X_train, kf_y_train)

        oof_train[test_index] = clf.predict_proba(kf_X_test)

        oof_test += clf.predict_proba(X_test)

        oof_test = oof_test/float(n_folds)

    return oof_train, oof_test



clf_second = RandomForestClassifier()

clf_second.fit(data_train, label_train)

pred = clf_second.predict(data_test)

accuracy = metrics.accuracy_score(label_test, pred)*100

print(accuracy)


#First Level Model

modelist = ['SVM','GBDT','RF','KNN']

newfeature_list = []

newtestdata_list = []

for modelname in modelist:

    clf_first = SelectModel(modelname)

    oof_train_ ,oof_test_= Stacker_(clf=clf_first,n_folds=10,X_train=data_train,y_train=label_train,X_test=data_test)

    newfeature_list.append(oof_train_)

    newtestdata_list.append(oof_test_)

    
 # Feature combination

newfeature = reduce(lambda x,y:np.concatenate((x,y),axis=1),newfeature_list) 

newtestdata = reduce(lambda x,y:np.concatenate((x,y),axis=1),newtestdata_list)


#Second level, use the output from the previous level as the training set

clf_second1 = RandomForestClassifier()

clf_second1.fit(newfeature, label_train)

pred = clf_second1.predict(newtestdata)

accuracy = metrics.accuracy_score(label_test, pred)*100

print(accuracy)