In [28]:
import os
import pandas as pd
import cv2
import torch 
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Config:
    DATA_DIR = '/kaggle/input/phuket-location/images/images'
    CSV_PATH = os.path.join('/kaggle/input/phuket-location', 'train.csv')
    CSV_PATH_TEST = os.path.join('/kaggle/input/phuket-location', 'test.csv')
    train_batch_size = 10
    val_batch_size = 10
    num_workers = 1
    image_size = 224
    output_dim = 512
    hidden_dim = 1024
    input_dim = 3
    epochs = 20
    lr = 1e-4
    num_of_classes = 15
    pretrained = True
    model_name = 'resnet101'
    seed = 42
    
Config = Config()

# DataSet

In [29]:
from torchvision import transforms
from torchvision.transforms import ToTensor

train_transform = transforms.Compose([
        transforms.ToPILImage(),
        # transforms.RandomRotation(10),         # rotate +/- 10 degrees
        # transforms.RandomHorizontalFlip(),     # reverse 50% of images
        # transforms.Resize(Config.image_size, Config.image_size),   ##### resize shortest side to 224 pixels
        # transforms.CenterCrop(224),            ##### crop longest side to 224 pixels at center
    
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

val_transform = transforms.Compose([
        transforms.ToPILImage(),
        # transforms.RandomHorizontalFlip(),     # reverse 50% of images
        # transforms.Resize(Config.image_size, Config.image_size),   ##### resize shortest side to 224 pixels
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])


# Train-Val Split

In [30]:
# # Split train into train and val
df_train = pd.read_csv('/kaggle/input/phuket-location/train.csv')

df_trainn, df_val = train_test_split(df_train, test_size=0.01, random_state=123)

In [31]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import pandas as pd
from sklearn import preprocessing
from torch.utils.data import Dataset


def img_path_from_id(id):
    img_path = os.path.join(Config.DATA_DIR, 'train', f'{id}')
    return img_path

def img_path_from_idd(id):
    img_path = os.path.join(Config.DATA_DIR, 'test', f'{id}')
    return img_path


class ImageDataset(Dataset):
    def __init__(self):
        self.df = df_trainn
        
        self.landmark_id_encoder = preprocessing.LabelEncoder()
        #self.df['label'] = self.landmark_id_encoder.fit_transform(self.df['label'])
        self.df['path'] = self.df['id'].apply(img_path_from_id)
        self.paths = self.df['path'].values
        self.ids = self.df['id'].values
        self.landmark_ids = self.df['label'].values
        self.transform = train_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        path, id, landmark_id = self.paths[idx], self.ids[idx], self.landmark_ids[idx]
        
        img = cv2.imread(path)
        img = cv2.resize(img, (Config.image_size , Config.image_size))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
#         img = Image.open(path)
#         img = Image.fromarray(img)
        # img = self.transform(img)
        
        if self.transform:
            img = self.transform(img)
        
        #labels = torch.tensor(self.landmark_ids[idx])  
        
        return img.to(device), landmark_id, id 

In [32]:
class ArcFace(nn.Module):
    def __init__(self, in_features, out_features, scale_factor=64.0, 
                 margin=0.50, 
                 criterion=None):
        super(ArcFace, self).__init__()
        self.in_features = in_features
        self.out_features = out_features

        if criterion:
            self.criterion = criterion
        else:
            self.criterion = nn.CrossEntropyLoss()

        self.margin = margin
        self.scale_factor = scale_factor

        self.weight = nn.Parameter(
            torch.FloatTensor(out_features, in_features))
        nn.init.xavier_uniform_(self.weight)

        self.cos_m = math.cos(margin)
        self.sin_m = math.sin(margin)
        self.th = math.cos(math.pi - margin)
        self.mm = math.sin(math.pi - margin) * margin

    def forward(self, input, label):
        # input is not l2 normalized
        # cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        cosine = F.linear(F.normalize(input.to(device)), F.normalize(self.weight.to(device)))
        
        # F.linear(F.normalize(input), F.normalize(self.weight))
        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))

        phi = cosine * self.cos_m - sine * self.sin_m
        phi = phi.type(cosine.type())
        phi = torch.where(cosine > self.th, phi, cosine - self.mm)

        one_hot = torch.zeros(cosine.size(), device= input.device).to(device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)

        logit = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        logit *= self.scale_factor

        #loss = self.criterion(logit, label) 
        
        loss = self.criterion(logit.to(device), label.to(device))

        return loss, logit 

# GeM

In [33]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6, requires_grad=False):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p, requires_grad=requires_grad).to(device)
        self.eps = eps

    def forward(self, x):
        return self.gem(x.to(device), p=self.p, eps=self.eps)
    

    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p).to(device)

    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'

# MultiAtrous

In [34]:
import timm
import torch.optim as optim
from torch.utils.data import DataLoader
from pytorch_lightning import LightningModule
import torchmetrics

class MultiAtrous(nn.Module):
    def __init__(self, in_channel, out_channel, size, dilation_rates=[3, 6, 9]):
        super().__init__()
        self.dilated_convs = [
            nn.Conv2d(in_channel, int(out_channel/4),
                      kernel_size=3, dilation=rate, padding=rate)
            for rate in dilation_rates
        ]
        self.gap_branch = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(in_channel, int(out_channel/4), kernel_size=1),
            nn.ReLU(),
            nn.Upsample(size=(size, size), mode='bilinear')
        )
        self.dilated_convs.append(self.gap_branch)
        self.dilated_convs = nn.ModuleList(self.dilated_convs)

    def forward(self, x):
        local_feat = []
        for dilated_conv in self.dilated_convs:
            local_feat.append(dilated_conv(x))
        local_feat = torch.cat(local_feat, dim=1)
        return local_feat


class DolgLocalBranch(nn.Module):
    def __init__(self, in_channel, out_channel, hidden_channel=2048):
        super().__init__()
        self.multi_atrous = MultiAtrous(in_channel, hidden_channel, size=int(Config.image_size/8))
        self.conv1x1_1 = nn.Conv2d(hidden_channel, out_channel, kernel_size=1)
        self.conv1x1_2 = nn.Conv2d(
            out_channel, out_channel, kernel_size=1, bias=False)
        self.conv1x1_3 = nn.Conv2d(out_channel, out_channel, kernel_size=1)

        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm2d(out_channel)
        self.softplus = nn.Softplus()
        
        
    def forward(self, x):
        local_feat = self.multi_atrous(x)

        local_feat = self.conv1x1_1(local_feat)
        local_feat = self.relu(local_feat)
        local_feat = self.conv1x1_2(local_feat)
        local_feat = self.bn(local_feat)

        attention_map = self.relu(local_feat)
        attention_map = self.conv1x1_3(attention_map)
        attention_map = self.softplus(attention_map)

        local_feat = F.normalize(local_feat, p=2, dim=1)
        local_feat = local_feat * attention_map

        return local_feat


class OrthogonalFusion(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, local_feat, global_feat):
        global_feat_norm = torch.norm(global_feat, p=2, dim=1)
        projection = torch.bmm(global_feat.unsqueeze(1), torch.flatten(
            local_feat, start_dim=2))
        projection = torch.bmm(global_feat.unsqueeze(
            2), projection).view(local_feat.size())
        projection = projection / \
            (global_feat_norm * global_feat_norm).view(-1, 1, 1, 1)
        orthogonal_comp = local_feat - projection
        global_feat = global_feat.unsqueeze(-1).unsqueeze(-1).to(device)
        
        return torch.cat([global_feat.expand(orthogonal_comp.size()), orthogonal_comp], dim=1)


class DolgNet(LightningModule):
    def __init__(self, input_dim, hidden_dim, output_dim, num_of_classes):
        super().__init__()
        self.cnn = timm.create_model('tv_resnet101',
                    pretrained=True,
                    features_only=True,
                    in_chans=input_dim,
                    out_indices=(2, 3))
        
        
        self.orthogonal_fusion = OrthogonalFusion()
        
        self.local_branch = DolgLocalBranch(512, hidden_dim)
        self.gap = nn.AdaptiveAvgPool2d(1)
        self.fc_1 = nn.Linear(1024, hidden_dim)
        self.fc_2 = nn.Linear(int(2*hidden_dim), output_dim)
        
        self.criterion = ArcFace( in_features  = output_dim,
                                  out_features = num_of_classes,
                                  scale_factor=30,
                                  margin=0.15,
                                  criterion=nn.CrossEntropyLoss())
        
        self.lr = Config.lr
        
        self.accuracy = torchmetrics.Accuracy(task= "multiclass", 
                                 num_classes= Config.num_of_classes)

        
    def forward(self, x):
        output = self.cnn(x)

        local_feat = self.local_branch(output[0])   # ,hidden_channel,16,16
        global_feat = self.fc_1(GeM()(output[1]).squeeze())  # ,1024

        feat = self.orthogonal_fusion(local_feat, global_feat)
        feat = self.gap(feat).squeeze()
        feat = self.fc_2(feat)
        
        return feat #out

    def training_step(self, batch, batch_idx):
        img, label, _ = batch
        embd = self(img).to(device)
        
        loss, logits = self.criterion(embd, label)

        
        return loss.to(device)

    def configure_optimizers(self):
        optimizer = optim.SGD(self.parameters(), lr=self.lr,
                              momentum=0.9, weight_decay=1e-5)
        scheduler = scheduler = optim.lr_scheduler.CosineAnnealingLR(
                                      optimizer, T_max=1000)
        
        return [optimizer], [scheduler]

    def train_dataloader(self):
        dataset = ImageDataset()
        
        return DataLoader(dataset, 
                          batch_size= Config.train_batch_size, shuffle=True)

In [35]:
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning import Trainer
import multiprocessing as mp

seed_everything(Config.seed)

model = DolgNet(
        input_dim  = Config.input_dim,
        hidden_dim = Config.hidden_dim,
        output_dim = Config.output_dim,
        num_of_classes = Config.num_of_classes)

model = model.to(device)

trainer = Trainer(gpus=1, max_epochs = Config.epochs)

trainer.fit(model)

  "`pytorch_lightning.utilities.seed.seed_everything` has been deprecated in v1.8.0 and will be"
  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"


Training: 0it [00:00, ?it/s]

In [37]:
# Specify a path
PATH = "/kaggle/working/DOLG_20__1024_sub.pt"

# # # Save
torch.save(model, PATH)

In [38]:
# Load
modell = torch.load(PATH)

# Extract Embedded Vector

In [39]:
# Validation
import tqdm
from tqdm import tqdm

class ImageValDataset(Dataset):
    def __init__(self):
        self.df = df_val
        
        #self.landmark_id_encoder = preprocessing.LabelEncoder()
        
        self.df['path'] = self.df['id'].apply(img_path_from_id)
        
        self.paths = self.df['path'].values
        self.ids = self.df['id'].values
        self.landmark_ids = self.df['label'].values
        self.transform = train_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        path, id, landmark_id = self.paths[idx], self.ids[idx], self.landmark_ids[idx]
        
        img = cv2.imread(path)
        img = cv2.resize(img, (Config.image_size , Config.image_size))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        
        if self.transform:
            img = self.transform(img)
        
        #labels = torch.tensor(self.landmark_ids[idx])  
        
        return img.to(device), landmark_id, id #str #torch.permute(img, (2, 0, 1))

In [40]:
train_set = ImageDataset()
train_loader = DataLoader(train_set , batch_size= Config.train_batch_size)

In [41]:
Train = []
modell = modell.to(device)

for batch in tqdm(train_loader):
    inputs, y_train, _ = batch
    Emv_v = modell(inputs).to(device)
    Emv_v = Emv_v.cpu().detach().numpy()
    Emv_v = Emv_v.astype(np.float32)
    
    Train.append((Emv_v, y_train.numpy()))

100%|██████████| 270/270 [01:02<00:00,  4.32it/s]


In [42]:
val_set = ImageValDataset()
val_loader = DataLoader(val_set , batch_size = Config.train_batch_size)

In [43]:
Val = []
modell = modell.to(device)

for batch in tqdm(val_loader):
    inputs, y_val, _ = batch
    Emv_v = modell(inputs).to(device)
    Emv_v = Emv_v.cpu().detach().numpy()
    Emv_v = Emv_v.astype(np.float32)
    
    Val.append((Emv_v, y_val.numpy()))

100%|██████████| 3/3 [00:00<00:00,  5.05it/s]


In [45]:
Train_Data  = []
Train_Lable = []

Val_Data = []
Val_Lable = []

for pair in Train:
    for batch in pair[0]:
        Train_Data.append(batch)
    #print(pair[1])
    for batch in pair[1]:
        Train_Lable.append(batch)

for pair in Val:
    for batch in pair[0]:
        Val_Data.append(batch)
    for batch in pair[1]:
        Val_Lable.append(batch)

In [46]:
# Evaluating 
import math

def Evaluating(Train_Data, Train_Lable, Val_Data, Val_Lable):
    Prd_Val = []
    for num1, vec_val in tqdm(enumerate(Val_Data)):
        Check = []
        
        for num2, vec_train in enumerate(Train_Data):
            dist = np.linalg.norm(vec_val - vec_train)
            Check.append(dist)

        Prd_Val.append(Train_Lable[np.argmin(Check)])
        
        Check = []
        
        if num1  == 20: print(Prd_Val) 
        
        ACC = np.sum(np.array(Prd_Val) == np.array(Val_Lable))/len(Val_Data)
        
    return print(ACC), Prd_Val
                
ACC, Prd_Val = Evaluating(Train_Data, Train_Lable, Val_Data, Val_Lable)

28it [00:00, 51.03it/s]

[2, 13, 7, 0, 9, 10, 12, 2, 12, 3, 0, 0, 4, 6, 8, 5, 5, 2, 1, 14, 4]
1.0





In [44]:
# 94.67 epoch 7
# 94.31 epoch 5
# 93.76 epoch 2
# 94.86 epoch 12

# 93.21 epoch 2 #448
# 93.02 epoch 2 #440 

In [47]:
class ImageTestDataset(Dataset):
    def __init__(self):
        self.df = pd.read_csv(Config.CSV_PATH_TEST)
        
        self.df['path'] = self.df['id'].apply(img_path_from_idd)
        self.paths = self.df['path'].values
        self.ids = self.df['id'].values
        self.transform = train_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        path, id = self.paths[idx], self.ids[idx]
        
        img = cv2.imread(path)
        img = cv2.resize(img, (Config.image_size , Config.image_size))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            img = self.transform(img)
        
        return img.to(device), id
    
test_set = ImageTestDataset()
test_loader = DataLoader(test_set , batch_size=10)

In [48]:
Test_Data  = []

modell = modell.to(device)

for batch in tqdm(test_loader):
    inputs, _ = batch
    Emv_v = modell(inputs).to(device)
    Emv_v = Emv_v.cpu().detach().numpy()
    Emv_v = Emv_v.astype(np.float32)
    
    for vec in Emv_v:   
        Test_Data.append(vec)

100%|██████████| 75/75 [00:18<00:00,  3.98it/s]


In [49]:
Prd_Test = []

for num1, vec_tst in tqdm(enumerate(Test_Data)):
    Check = []
    for num2, vec_train in enumerate(Train_Data):
        dist = np.linalg.norm(vec_tst - vec_train)
        Check.append(dist)
        
    Prd_Test.append(Train_Lable[np.argmin(Check)])
    Check = []

750it [00:15, 49.90it/s]


# ML models

In [48]:
import statistics as stat
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn import preprocessing
from sklearn.linear_model import RidgeClassifier
from sklearn.cluster import KMeans
import lightgbm as lgb


# Normalize data
input_train = np.array(Train_Data)
normalized_train = preprocessing.normalize(input_train, norm='l2')

input_val = np.array(Val_Data)
normalized_val = preprocessing.normalize(input_val, norm='l2')

input_test = np.array(Test_Data)
normalized_test = preprocessing.normalize(input_test, norm='l2')

# SVM

In [49]:
# Training
SVM = svm.SVC()
SVM.fit(normalized_train, Train_Lable)

# Evaluating
np.sum(SVM.predict(normalized_val) == Val_Lable)/len(Val_Lable)

0.9394495412844037

# KNN

In [50]:
# Training
KNN = KNeighborsClassifier(n_neighbors = 15)
KNN.fit(normalized_train, Train_Lable)

# Evaluating
np.sum(KNN.predict(normalized_val) == Val_Lable)/len(Val_Lable)

0.9357798165137615

# Ridge Classifier

In [51]:
# Training
RC = RidgeClassifier().fit(normalized_train, Train_Lable)

# Evaluating
np.sum(RC.predict(normalized_val) == Val_Lable)/len(Val_Lable)

0.9357798165137615

# LightGBM

In [52]:
# It'll take a while to run this cell

# # Training
# Lgbm = lgb.LGBMClassifier()
# Lgbm.fit(normalized_train, Train_Lable)

# # Evaluating
# np.sum(Lgbm.predict(normalized_val) == Val_Lable)/len(Val_Lable)

# Kmeans

In [53]:
kmeans = KMeans(n_clusters=15, random_state=0).fit(np.array(list(normalized_train) + 
                                                            list(normalized_val) ))
PRD_TRN = kmeans.predict(normalized_train)
PRD_VAL = kmeans.predict(normalized_val)

Cluster = [[] for i in range(15)]
for idx, clss in enumerate(Train_Lable) :
    Cluster[clss].append(PRD_TRN[idx])
    
Clss = [stat.mode(clss) for clss in Cluster] 
vall = []
for prd_val in PRD_VAL:
    vall.append(Clss.index(prd_val))
    
# # Evaluating
np.sum(np.array(vall) == Val_Lable)/len(Val_Lable)

0.9302752293577982

# Ensemble Method

In [54]:
END = []
SVM_PRD = SVM.predict(normalized_val)
RC_PRD  = RC.predict(normalized_val)
KNN_PRD = KNN.predict(normalized_val)

for idx in range(len(Val_Lable)):
    try:
        END.append(stat.mode([Prd_Val[idx], SVM_PRD[idx],  
                              RC_PRD[idx], KNN_PRD[idx] ]))
        
    except: 
        END.append(Prd_Val[idx])
        
# Evaluating
np.sum(np.array(END) == Val_Lable)/len(Val_Lable)

0.9431192660550459

# Episodic Predicting

In [55]:
TEST_QUERY = [[] for i in range(15)]

for num, vec_train in enumerate(Train_Data):
    TEST_QUERY[Train_Lable[num]] = TEST_QUERY[Train_Lable[num]] + [vec_train]
    
Len_Query = [len(Q) for Q in TEST_QUERY]

In [56]:
def EpisodeGen():
    Episode_index = [np.random.randint(len(TEST_QUERY[idx]), size = min(Len_Query))
                     for idx in range(15)]

    Episode = [[] for idx in range(min(Len_Query))]

    for i in range(min(Len_Query)):
        for j in range(15):
            Episode[i] = Episode[i] + [TEST_QUERY[j][Episode_index[j][i]]]
            
    return Episode

In [57]:
EPD_VAL = []
Checkk = []
BOX = []

for vec_val in tqdm(Val_Data):
    Episode = EpisodeGen() 
    for epd in Episode :
        for q_eq in epd:
            dist = np.linalg.norm(vec_val - q_eq)
            Checkk.append(dist)
        BOX.append(np.argmin(Checkk))
        Checkk = []
        
    EPD_VAL.append(stat.mode(BOX))   
    BOX = []

# Evaluate Episodic Predicting
np.sum(np.array(EPD_VAL) == Val_Lable)/len(Val_Lable)

100%|██████████| 545/545 [00:07<00:00, 75.64it/s]


0.9321100917431193

# Submission

In [50]:
ID = list(pd.read_csv('/kaggle/input/phuket-location/submit.csv')['id'])

# Create a DataFrame from the dictionary
df_submission = pd.DataFrame(data = {'id': ID, 'predict': Prd_Test})
df_submission.head()

Unnamed: 0,id,predict
0,6a270f855c45a53c9cd29704e4ec1811.jpg,3
1,2c76f54dd40ef6747cc2e179c091173c.jpg,1
2,8a2ab1e2da3aa24c838d2644b8beca77.jpg,7
3,5b9c7c23ec76a6becf9c9beb4c7bd5f5.jpg,13
4,6cea6f1ff4873fbcaa4e2ec4e082e361.jpg,3


In [51]:
df_submission.to_csv('/kaggle/working/submission_Dolg3.csv',index=False)

In [60]:
os.listdir('/kaggle/working/')

['.virtual_documents',
 'submission_Dolg1.csv',
 'DOLG_10_512.pt',
 '__notebook_source__.ipynb',
 'lightning_logs']