In [1]:
import pandas as pd
from fastai.vision import *
import os

In [2]:
chimps = ["/data/Gabon_trainingData/" + file for file in os.listdir("gabon_extra_data/chimpanzee/")]
gorillas = ["/data/Gabon_trainingData/" + file for file in os.listdir("gabon_extra_data/gorilla/")]
print(len(chimps))
print(len(gorillas))

987
232


# OLD MODEL

# Gorillas

In [11]:
learn = load_learner("./","stage4a-intermediate_bestmodel.pkl", test=gorillas)

In [12]:
preds, y = learn.get_preds(ds_type=DatasetType.Test)

In [13]:
preds = pd.DataFrame(
        np.stack(preds),
        columns=learn.data.classes)

In [16]:
sort_classes = sorted(learn.data.classes)
df_preds = preds.copy()
ranks = df_preds.rank(axis=1,method='dense', ascending=False).astype(int)

df_preds["pred_1"] = pd.Series(ranks.where(ranks==1).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])
df_preds["pred_2"] = pd.Series(ranks.where(ranks==2).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])
df_preds["pred_3"] = pd.Series(ranks.where(ranks==3).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])

df_preds["score_1"] = df_preds.apply(lambda x: x[x.pred_1], axis=1)
df_preds["score_2"] = df_preds.apply(lambda x: x[x.pred_2], axis=1)
df_preds["score_3"] = df_preds.apply(lambda x: x[x.pred_3], axis=1)

In [19]:
df_preds[["pred_1", "pred_2", "pred_3"]]

Unnamed: 0,pred_1,pred_2,pred_3
0,Mandrillus,Human,Gorilla
1,Chimpanzee,Elephant_African,Gorilla
2,Chimpanzee,Gorilla,Elephant_African
3,Chimpanzee,Gorilla,Elephant_African
4,Monkey,Gorilla,Chimpanzee
...,...,...,...
227,Gorilla,Human,Chimpanzee
228,Chimpanzee,Mandrillus,Gorilla
229,Chimpanzee,Gorilla,Monkey
230,Chimpanzee,Elephant_African,Gorilla


In [22]:
correct_top1 = (df_preds.pred_1 == "Gorilla").sum()
correct_top3 = ((df_preds.pred_1 == "Gorilla") | (df_preds.pred_2 == "Gorilla") | (df_preds.pred_3 == "Gorilla")).sum()
all_count = len(df_preds)

In [23]:
print(f"Top 1 accuracy on Gorillas is {correct_top1 / all_count * 100}%")
print(f"Top 3 accuracy on Gorillas is {correct_top3 / all_count * 100}%")

Top 1 accuracy on Gorillas is 15.517241379310345%
Top 3 accuracy on Gorillas is 80.17241379310344%


# Chimps

In [24]:
learn = load_learner("./","stage4a-intermediate_bestmodel.pkl", test=chimps)

In [25]:
preds, y = learn.get_preds(ds_type=DatasetType.Test)

In [26]:
preds = pd.DataFrame(
        np.stack(preds),
        columns=learn.data.classes)

In [27]:
sort_classes = sorted(learn.data.classes)
df_preds = preds.copy()
ranks = df_preds.rank(axis=1,method='dense', ascending=False).astype(int)

df_preds["pred_1"] = pd.Series(ranks.where(ranks==1).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])
df_preds["pred_2"] = pd.Series(ranks.where(ranks==2).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])
df_preds["pred_3"] = pd.Series(ranks.where(ranks==3).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])

df_preds["score_1"] = df_preds.apply(lambda x: x[x.pred_1], axis=1)
df_preds["score_2"] = df_preds.apply(lambda x: x[x.pred_2], axis=1)
df_preds["score_3"] = df_preds.apply(lambda x: x[x.pred_3], axis=1)

In [28]:
df_preds[["pred_1", "pred_2", "pred_3"]]

Unnamed: 0,pred_1,pred_2,pred_3
0,Chimpanzee,Gorilla,Elephant_African
1,Chimpanzee,Elephant_African,Gorilla
2,Chimpanzee,Hog_Red_River,Gorilla
3,Chimpanzee,Gorilla,Elephant_African
4,Chimpanzee,Gorilla,Hog_Red_River
...,...,...,...
982,Chimpanzee,Gorilla,Mandrillus
983,Chimpanzee,Gorilla,Mandrillus
984,Chimpanzee,Gorilla,Elephant_African
985,Chimpanzee,Elephant_African,Gorilla


In [29]:
correct_top1 = (df_preds.pred_1 == "Chimpanzee").sum()
correct_top3 = ((df_preds.pred_1 == "Chimpanzee") | (df_preds.pred_2 == "Chimpanzee") | (df_preds.pred_3 == "Chimpanzee")).sum()
all_count = len(df_preds)

In [30]:
print(f"Top 1 accuracy on Chimpanzees is {correct_top1 / all_count * 100}%")
print(f"Top 3 accuracy on Chimpanzees is {correct_top3 / all_count * 100}%")

Top 1 accuracy on Chimpanzees is 92.19858156028369%
Top 3 accuracy on Chimpanzees is 97.66970618034448%


# NEW MODEL

# Gorillas

In [5]:
learn = load_learner("gabon_wildlife_wwf_retrain/saved_models/","stage4f-5epochs-576_768-rescaled.pkl", test=gorillas).to_fp16()

In [6]:
preds, y = learn.get_preds(ds_type=DatasetType.Test)

In [7]:
preds = pd.DataFrame(
        np.stack(preds),
        columns=learn.data.classes)

In [8]:
sort_classes = sorted(learn.data.classes)
df_preds = preds.copy()
ranks = df_preds.rank(axis=1,method='dense', ascending=False).astype(int)

df_preds["pred_1"] = pd.Series(ranks.where(ranks==1).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])
df_preds["pred_2"] = pd.Series(ranks.where(ranks==2).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])
df_preds["pred_3"] = pd.Series(ranks.where(ranks==3).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])

df_preds["score_1"] = df_preds.apply(lambda x: x[x.pred_1], axis=1)
df_preds["score_2"] = df_preds.apply(lambda x: x[x.pred_2], axis=1)
df_preds["score_3"] = df_preds.apply(lambda x: x[x.pred_3], axis=1)

In [9]:
df_preds[["pred_1", "pred_2", "pred_3"]]

Unnamed: 0,pred_1,pred_2,pred_3
0,Gorilla,Chimpanzee,Mandrillus
1,Gorilla,Chimpanzee,Elephant_African
2,Gorilla,Chimpanzee,Mandrillus
3,Gorilla,Chimpanzee,Mandrillus
4,Gorilla,Chimpanzee,Mandrillus
...,...,...,...
227,Gorilla,Chimpanzee,Human
228,Gorilla,Chimpanzee,Mandrillus
229,Gorilla,Chimpanzee,Mandrillus
230,Gorilla,Chimpanzee,Mandrillus


In [10]:
correct_top1 = (df_preds.pred_1 == "Gorilla").sum()
correct_top3 = ((df_preds.pred_1 == "Gorilla") | (df_preds.pred_2 == "Gorilla") | (df_preds.pred_3 == "Gorilla")).sum()
all_count = len(df_preds)

In [11]:
print(f"Top 1 accuracy on Gorillas is {correct_top1 / all_count * 100}%")
print(f"Top 3 accuracy on Gorillas is {correct_top3 / all_count * 100}%")

Top 1 accuracy on Gorillas is 93.53448275862068%
Top 3 accuracy on Gorillas is 100.0%


# Chimps

In [12]:
learn = load_learner("gabon_wildlife_wwf_retrain/saved_models/","stage4f-5epochs-576_768-rescaled.pkl", test=chimps).to_fp16()

In [13]:
preds, y = learn.get_preds(ds_type=DatasetType.Test)

In [14]:
preds = pd.DataFrame(
        np.stack(preds),
        columns=learn.data.classes)

In [15]:
sort_classes = sorted(learn.data.classes)
df_preds = preds.copy()
ranks = df_preds.rank(axis=1,method='dense', ascending=False).astype(int)

df_preds["pred_1"] = pd.Series(ranks.where(ranks==1).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])
df_preds["pred_2"] = pd.Series(ranks.where(ranks==2).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])
df_preds["pred_3"] = pd.Series(ranks.where(ranks==3).notnull().values.nonzero()[1]).apply(lambda x: sort_classes[x])

df_preds["score_1"] = df_preds.apply(lambda x: x[x.pred_1], axis=1)
df_preds["score_2"] = df_preds.apply(lambda x: x[x.pred_2], axis=1)
df_preds["score_3"] = df_preds.apply(lambda x: x[x.pred_3], axis=1)

In [16]:
df_preds[["pred_1", "pred_2", "pred_3"]]

Unnamed: 0,pred_1,pred_2,pred_3
0,Chimpanzee,Gorilla,Mongoose
1,Chimpanzee,Gorilla,Elephant_African
2,Chimpanzee,Gorilla,Elephant_African
3,Chimpanzee,Gorilla,Human
4,Chimpanzee,Gorilla,Mongoose
...,...,...,...
982,Chimpanzee,Gorilla,Duiker_Yellow_Backed
983,Chimpanzee,Gorilla,Mandrillus
984,Chimpanzee,Gorilla,Duiker_Red
985,Chimpanzee,Gorilla,Mongoose


In [17]:
correct_top1 = (df_preds.pred_1 == "Chimpanzee").sum()
correct_top3 = ((df_preds.pred_1 == "Chimpanzee") | (df_preds.pred_2 == "Chimpanzee") | (df_preds.pred_3 == "Chimpanzee")).sum()
all_count = len(df_preds)

In [18]:
print(f"Top 1 accuracy on Chimpanzees is {correct_top1 / all_count * 100}%")
print(f"Top 3 accuracy on Chimpanzees is {correct_top3 / all_count * 100}%")

Top 1 accuracy on Chimpanzees is 99.3920972644377%
Top 3 accuracy on Chimpanzees is 100.0%
