In [1]:
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader,TensorDataset
import torch
from pathlib import Path
import pandas as pd

In [2]:
PATH = Path("/home/shaun/data/kaggle/iwild/")

In [3]:
[print(i) for i in PATH.iterdir()]

/home/shaun/data/kaggle/iwild/sample_submission.csv
/home/shaun/data/kaggle/iwild/train.csv
/home/shaun/data/kaggle/iwild/animal_preds_train_res50-1.npy
/home/shaun/data/kaggle/iwild/test-animal-preds-res50.npy
/home/shaun/data/kaggle/iwild/is_animal_train-res50.npy
/home/shaun/data/kaggle/iwild/test-is-animal-res50.npy


[None, None, None, None, None, None]

## Metrics

In [4]:
def accuracy(out, labels):
    preds = torch.argmax(out, dim=1)
    return (preds == labels).float().mean()

## Data prep

In [5]:
is_animal = np.load(PATH/"is_animal_train-res50.npy")

In [6]:
is_animal.shape

(196299, 2)

In [7]:
animal = np.load(PATH/"animal_preds_train_res50-1.npy")

In [8]:
animal.shape

(196299, 13)

In [9]:
joined = np.concatenate((is_animal, animal), axis=1)

In [10]:
df = pd.read_csv(PATH/"train.csv")
classes = df["category_id"].unique()
classes_map = {classes[i]:i for i in range(len(classes))}
new_labels = np.array([ classes_map[i] for i in df["category_id"]])

In [11]:
classes_map

{0: 1,
 1: 6,
 3: 2,
 4: 4,
 8: 3,
 10: 12,
 11: 7,
 13: 5,
 14: 10,
 16: 8,
 17: 9,
 18: 11,
 19: 0,
 22: 13}

In [12]:
len(new_labels)

196299

In [15]:
x = joined
y = new_labels

In [16]:
random_idxs = np.random.permutation(len(x))

In [17]:
split=200
train_idxs = random_idxs[split:]
valid_idx = random_idxs[:split]

In [18]:

x_train, y_train, x_valid, y_valid = map(torch.tensor, (x[split:], y[split:], x[:split], y[:split])) 

In [19]:
train_dataset = TensorDataset(x_train, y_train)
valid_dataset = TensorDataset(x_valid, y_valid)

In [20]:
bs = 1000
train_data_loader = DataLoader(train_dataset, batch_size=bs)
valid_data_lodaer = DataLoader(valid_dataset, batch_size=bs)

## Model definition

In [21]:
in_features = 15
num_classes = 14

model = nn.Sequential(
    nn.Linear(in_features, 30),
    nn.ReLU(),
    nn.Linear(30, num_classes)
)

In [22]:
model

Sequential(
  (0): Linear(in_features=15, out_features=30, bias=True)
  (1): ReLU()
  (2): Linear(in_features=30, out_features=14, bias=True)
)

## Train Definition

In [23]:
lr = 1e-3

ce_loss = nn.CrossEntropyLoss()

optim = torch.optim.Adam(model.parameters(), lr=lr)

In [24]:
def step(batch, train):
    data, labels = batch #get batch
    out = model(data.float()) # get predictions
    loss = ce_loss(out, labels) # calcualte loss
    acc = accuracy(out, labels)
    if train:
        optim.zero_grad() # zero graidents
        loss.backward() # calcualte gradients
        optim.step() #update parameters

    return loss.item(), acc


In [25]:
def train_epoch(dataloader, train=True):
    loss_hist = []
    acc_hist = []
    for batch in dataloader:
        loss_t, acc_t = step(batch, train=train)
        loss_hist.append(loss_t)
        acc_hist.append(acc_t)
        
    return np.mean(loss_hist), np.mean(acc_hist)

In [26]:
epochs = 10
loss_train = np.empty(epochs)
acc_train = np.empty(epochs)
loss_eval = np.empty(epochs)
acc_eval = np.empty(epochs)

for ep  in range(epochs):
    print(f'Epoch: {ep}')
    loss_t, acc_t = train_epoch(train_data_loader)
    loss_v, acc_v = train_epoch(valid_data_lodaer, train=False)
    loss_train[ep], acc_train[ep], loss_eval[ep], acc_eval[ep] =  loss_t, acc_t, loss_v, acc_v
    print(f' Train Loss: {np.round(loss_t,3)}, Valid Loss:{np.round(loss_v,3)}')
    print(f' Train acc: {np.round(acc_t,3)}, Valid acc:{np.round(acc_v,3)}')


Epoch: 0
 Train Loss: 1.553, Valid Loss:0.731
 Train acc: 0.6370000243186951, Valid acc:0.800000011920929
Epoch: 1
 Train Loss: 0.5, Valid Loss:0.337
 Train acc: 0.8970000147819519, Valid acc:0.9399999976158142
Epoch: 2
 Train Loss: 0.275, Valid Loss:0.209
 Train acc: 0.9369999766349792, Valid acc:0.949999988079071
Epoch: 3
 Train Loss: 0.21, Valid Loss:0.173
 Train acc: 0.9470000267028809, Valid acc:0.949999988079071
Epoch: 4
 Train Loss: 0.19, Valid Loss:0.161
 Train acc: 0.9470000267028809, Valid acc:0.949999988079071
Epoch: 5
 Train Loss: 0.183, Valid Loss:0.157
 Train acc: 0.9470000267028809, Valid acc:0.949999988079071
Epoch: 6
 Train Loss: 0.18, Valid Loss:0.155
 Train acc: 0.9470000267028809, Valid acc:0.949999988079071
Epoch: 7
 Train Loss: 0.178, Valid Loss:0.154
 Train acc: 0.9470000267028809, Valid acc:0.949999988079071
Epoch: 8
 Train Loss: 0.177, Valid Loss:0.153
 Train acc: 0.9480000138282776, Valid acc:0.949999988079071
Epoch: 9
 Train Loss: 0.177, Valid Loss:0.153
 Tra

## Submission

In [27]:
model_e = model.eval()

In [31]:
is_animal_t = np.load(PATH/"test-is-animal-res50.npy")

In [30]:
animal_t = np.load(PATH/"test-animal-preds-res50.npy")

In [47]:
joined = np.concatenate((is_animal_t, animal_t), axis=1)

In [45]:
joined.shape

(153730, 15)

In [54]:
joined_t = torch.from_numpy(joined)

In [55]:
joined_ds = TensorDataset(joined_t)


In [56]:
joinded_dl = DataLoader(joined_ds, batch_size=bs)

In [72]:
final_preds = []
for b in joinded_dl:
    pred =  model_e(b[0])
    final_preds.extend(np.argmax(pred.detach().numpy(), axis=1))

In [75]:
len(final_preds) == len(joined)

True

In [78]:
classes_map_reversed = {v:k for k,v in classes_map.items()}

In [79]:
final_final_preds = [classes_map_reversed[i] for i in final_preds ]

In [80]:
final_final_preds[:5]

[0, 0, 0, 0, 0]

In [82]:
df_test = pd.read_csv(PATH/"test.csv")

In [83]:
df_test["Predicted"] =  final_final_preds


In [85]:
df_subm = df_test[["id","Predicted" ]]


In [86]:
df_subm.head()

Unnamed: 0,id,Predicted
0,bce932f6-2bf6-11e9-bcad-06f10d5896c4,0
1,bce932f7-2bf6-11e9-bcad-06f10d5896c4,0
2,bce932f8-2bf6-11e9-bcad-06f10d5896c4,0
3,bce932f9-2bf6-11e9-bcad-06f10d5896c4,0
4,bce932fa-2bf6-11e9-bcad-06f10d5896c4,0


In [88]:
df_subm.to_csv("subm_ens.csv", index=False)

In [90]:
! kaggle competitions submit -c iwildcam-2019-fgvc6 -f subm_ens.csv -m "Ensemble submission 1"

100%|███████████████████████████████████████| 5.76M/5.76M [00:12<00:00, 489kB/s]
Successfully submitted to iWildCam 2019 - FGVC6