In [1]:
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader,TensorDataset
import torch
from pathlib import Path
import pandas as pd

In [2]:
PATH = Path("/home/ubuntu/data/")

In [3]:
[print(i) for i in PATH.iterdir()]

/home/ubuntu/data/train
/home/ubuntu/data/test.csv
/home/ubuntu/data/train.csv
/home/ubuntu/data/test
/home/ubuntu/data/models
/home/ubuntu/data/animal_preds_train_res34.npy
/home/ubuntu/data/test-animal-preds-res50.npy
/home/ubuntu/data/is_animal_train-res50.npy
/home/ubuntu/data/test-is-animal-res50.npy


[None, None, None, None, None, None, None, None, None]

## Metrics

In [4]:
def accuracy(out, labels):
    preds = torch.argmax(out, dim=1)
    return (preds == labels).float().mean()

## Data prep

In [5]:
is_animal = np.load(PATH/"is_animal_train-res50.npy")

In [6]:
is_animal.shape

(196299, 2)

In [7]:
animal = np.load(PATH/"animal_preds_train_res34.npy")

In [8]:
animal.shape

(196299, 13)

## Simple Approach

In [None]:
final_preds = []
for i,pred in zip(is_animal_f,animal_preds_f):
    if i:
        final_preds.append(class_map_reversed[pred])
    else:
        final_preds.append(0)

In [9]:
joined = np.concatenate((is_animal, animal), axis=1)

In [10]:
df = pd.read_csv(PATH/"train.csv")
classes = df["category_id"].unique()
classes_map = {classes[i]:i for i in range(len(classes))}
new_labels = np.array([ classes_map[i] for i in df["category_id"]])

In [11]:
classes_map

{19: 0,
 0: 1,
 3: 2,
 8: 3,
 4: 4,
 13: 5,
 1: 6,
 11: 7,
 16: 8,
 17: 9,
 14: 10,
 18: 11,
 10: 12,
 22: 13}

In [12]:
len(new_labels)

196299

In [13]:
x = joined
y = new_labels

In [14]:
random_idxs = np.random.permutation(len(x))

In [15]:
split=200
train_idxs = random_idxs[split:]
valid_idx = random_idxs[:split]

In [16]:

x_train, y_train, x_valid, y_valid = map(torch.tensor, (x[split:], y[split:], x[:split], y[:split])) 

In [17]:
train_dataset = TensorDataset(x_train, y_train)
valid_dataset = TensorDataset(x_valid, y_valid)

In [18]:
bs = 1000
train_data_loader = DataLoader(train_dataset, batch_size=bs)
valid_data_lodaer = DataLoader(valid_dataset, batch_size=bs)

## Model definition

In [46]:
in_features = 15
num_classes = 14

model = nn.Sequential(
    nn.Linear(in_features, 50),
    nn.ReLU(),
    nn.Linear(50, num_classes)
)

In [47]:
model

Sequential(
  (0): Linear(in_features=15, out_features=50, bias=True)
  (1): ReLU()
  (2): Linear(in_features=50, out_features=14, bias=True)
)

## Train Definition

In [48]:
lr = 1e-3
ce_loss = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=lr)

In [49]:
def step(batch, train):
    data, labels = batch #get batch
    out = model(data.float()) # get predictions
    loss = ce_loss(out, labels) # calcualte loss
    acc = accuracy(out, labels)
    if train:
        optim.zero_grad() # zero graidents
        loss.backward() # calcualte gradients
        optim.step() #update parameters

    return loss.item(), acc


In [50]:
def train_epoch(dataloader, train=True):
    loss_hist = []
    acc_hist = []
    for batch in dataloader:
        loss_t, acc_t = step(batch, train=train)
        loss_hist.append(loss_t)
        acc_hist.append(acc_t)
        
    return np.mean(loss_hist), np.mean(acc_hist)

In [51]:
epochs = 10
loss_train = np.empty(epochs)
acc_train = np.empty(epochs)
loss_eval = np.empty(epochs)
acc_eval = np.empty(epochs)

for ep  in range(epochs):
    print(f'Epoch: {ep}')
    loss_t, acc_t = train_epoch(train_data_loader)
    loss_v, acc_v = train_epoch(valid_data_lodaer, train=False)
    loss_train[ep], acc_train[ep], loss_eval[ep], acc_eval[ep] =  loss_t, acc_t, loss_v, acc_v
    print(f' Train Loss: {np.round(loss_t,3)}, Valid Loss:{np.round(loss_v,3)}')
    print(f' Train acc: {np.round(acc_t,3)}, Valid acc:{np.round(acc_v,3)}')


Epoch: 0
 Train Loss: 1.348, Valid Loss:0.576
 Train acc: 0.6990000009536743, Valid acc:0.8899999856948853
Epoch: 1
 Train Loss: 0.331, Valid Loss:0.148
 Train acc: 0.9419999718666077, Valid acc:0.9850000143051147
Epoch: 2
 Train Loss: 0.147, Valid Loss:0.077
 Train acc: 0.9660000205039978, Valid acc:0.9850000143051147
Epoch: 3
 Train Loss: 0.113, Valid Loss:0.063
 Train acc: 0.9679999947547913, Valid acc:0.9850000143051147
Epoch: 4
 Train Loss: 0.104, Valid Loss:0.058
 Train acc: 0.968999981880188, Valid acc:0.9850000143051147
Epoch: 5
 Train Loss: 0.101, Valid Loss:0.056
 Train acc: 0.968999981880188, Valid acc:0.9850000143051147
Epoch: 6
 Train Loss: 0.1, Valid Loss:0.055
 Train acc: 0.968999981880188, Valid acc:0.9850000143051147
Epoch: 7
 Train Loss: 0.099, Valid Loss:0.055
 Train acc: 0.968999981880188, Valid acc:0.9850000143051147
Epoch: 8
 Train Loss: 0.099, Valid Loss:0.054
 Train acc: 0.968999981880188, Valid acc:0.9850000143051147
Epoch: 9
 Train Loss: 0.098, Valid Loss:0.05

## Submission

In [52]:
model_e = model.eval()

In [53]:
is_animal_t = np.load(PATH/"test-is-animal-res50.npy")

In [54]:
animal_t = np.load(PATH/"test-animal-preds-res50.npy")

In [55]:
joined = np.concatenate((is_animal_t, animal_t), axis=1)

In [56]:
joined.shape

(153730, 15)

In [57]:
joined_t = torch.from_numpy(joined)

In [58]:
joined_ds = TensorDataset(joined_t)


In [59]:
joinded_dl = DataLoader(joined_ds, batch_size=bs)

In [60]:
final_preds = []
for b in joinded_dl:
    pred =  model_e(b[0])
    final_preds.extend(np.argmax(pred.detach().numpy(), axis=1))

In [61]:
len(final_preds) == len(joined)

True

In [62]:
classes_map_reversed = {v:k for k,v in classes_map.items()}

In [63]:
final_final_preds = [classes_map_reversed[i] for i in final_preds ]

In [64]:
final_final_preds[:5]

[0, 0, 0, 0, 0]

In [65]:
df_test = pd.read_csv(PATH/"test.csv")

In [66]:
df_test["Predicted"] =  final_final_preds


In [67]:
df_subm = df_test[["id","Predicted" ]]


In [68]:
df_subm.head()

Unnamed: 0,id,Predicted
0,bce932f6-2bf6-11e9-bcad-06f10d5896c4,0
1,bce932f7-2bf6-11e9-bcad-06f10d5896c4,0
2,bce932f8-2bf6-11e9-bcad-06f10d5896c4,0
3,bce932f9-2bf6-11e9-bcad-06f10d5896c4,0
4,bce932fa-2bf6-11e9-bcad-06f10d5896c4,0


In [69]:
df_subm.to_csv("subm_ens3.csv", index=False)

In [70]:
! kaggle competitions submit -c iwildcam-2019-fgvc6 -f subm_ens3.csv -m "Ensemble submission 3"

100%|██████████████████████████████████████| 5.76M/5.76M [00:03<00:00, 1.72MB/s]
Successfully submitted to iWildCam 2019 - FGVC6