In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler

In [2]:
root = Path.cwd().parent
data = root / 'data'

In [25]:
df = pd.read_csv(data / 'train_val.csv', parse_dates=['date'])

In [27]:
class_priors = np.zeros(len(df['class_id'].unique()))
for species in df['class_id'].unique():
    class_priors[species] = len(df[df['class_id'] == species])

class_priors = class_priors/sum(class_priors)

month_distributions = {}

for _, observation in tqdm(df.iterrows(), total=len(df)):
    month = str(observation['date'].month)
    class_id = observation.class_id
    if month not in month_distributions:        
        month_distributions[month] = np.zeros(len(df['class_id'].unique()))
    else:
        month_distributions[month][class_id] += 1

for key, value in month_distributions.items():
    month_distributions[key] = value / sum(value)

100%|██████████| 194743/194743 [00:51<00:00, 3810.82it/s]


In [18]:
df['norm_date'] = MinMaxScaler().fit_transform(df[['days']])
df.drop('date_c', axis=1).to_csv(data / 'train_val.csv', index=False)

In [32]:
import pickle

pickle.dump(month_distributions, open(root / 'month_distributions.pkl', 'wb'))
pickle.dump(class_priors, open(root / 'class_priors.pkl', 'wb'))

In [28]:
df[df['dataset'] == 2018]

Unnamed: 0,name,family,phylum,order,genus,class,file_name,height,width,location_uncertainty,...,date_c,longitude,set,specific_epithet,image_dir_name,dataset,common_name,file_path,gcs_path,class_id
0,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,cbb41cb8edf3cfe364e4e3b083ab7a24.jpg,600,800,0.0,...,0.5852,-81.437020,train,rubrocinctum,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,185
1,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,755adc87e72ccb246b5b177efb0b8715.jpg,600,800,29.0,...,0.8132,-81.436785,train,rubrocinctum,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,185
2,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,0be8000dcc267e8538152788dd4a05fe.jpg,600,800,0.0,...,0.9753,-81.453300,train,rubrocinctum,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,185
3,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,e3199adcb85cdf4b66e0066a6957788a.jpg,600,800,0.0,...,0.9753,-81.453300,train,rubrocinctum,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,185
4,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,7fc01fe5971cc9d8a5e0ccd49fad91af.jpg,600,800,0.0,...,0.5495,-81.210864,train,rubrocinctum,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,185
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7822,Tremella mesenterica,Tremellaceae,Basidiomycota,Tremellales,Tremella,Tremellomycetes,e1321c2693dbcdcd8b56d1526d243711.jpg,800,600,3080.0,...,0.2280,-71.529176,validation,mesenterica,Fungi_Basidiomycota_Tremellomycetes_Tremellale...,2018,,Mushroom-Classifier/data/train/Fungi_Basidiomy...,gs://mush-img-repo/train/ Fungi_Basidiomycota_...,435
7823,Tremella mesenterica,Tremellaceae,Basidiomycota,Tremellales,Tremella,Tremellomycetes,4ca620046002ebc2c6da005a178bde05.jpg,800,696,2500.0,...,0.5852,-83.513951,validation,mesenterica,Fungi_Basidiomycota_Tremellomycetes_Tremellale...,2018,,Mushroom-Classifier/data/train/Fungi_Basidiomy...,gs://mush-img-repo/train/ Fungi_Basidiomycota_...,435
7824,Entomophthora muscae,Entomophthoraceae,Zygomycota,Entomophthorales,Entomophthora,Zygomycetes,6a16cf13d427f97e75553a858fb3343e.jpg,600,800,0.0,...,0.5907,-122.265342,validation,muscae,Fungi_Zygomycota_Zygomycetes_Entomophthorales_...,2018,,Mushroom-Classifier/data/train/Fungi_Zygomycot...,gs://mush-img-repo/train/ Fungi_Zygomycota_Zyg...,133
7825,Entomophthora muscae,Entomophthoraceae,Zygomycota,Entomophthorales,Entomophthora,Zygomycetes,ae3ad120bee653f6642a2c8d8468c975.jpg,800,657,0.0,...,0.6319,-122.265495,validation,muscae,Fungi_Zygomycota_Zygomycetes_Entomophthorales_...,2018,,Mushroom-Classifier/data/train/Fungi_Zygomycot...,gs://mush-img-repo/train/ Fungi_Zygomycota_Zyg...,133


In [42]:
df2 = pd.read_csv(data / '2018_train_val.csv')

In [43]:
df2[['genus', 'specific_epithet']] = pd.DataFrame(df2['name'].str.split().tolist(), index=df2.index)
df2

Unnamed: 0,name,family,phylum,category_id,order,genus,class,file_name,height,width,loc_uncert,date,lat,date_c,lon,set,specific_epithet
0,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,4900,Arthoniales,Herpothallon,Arthoniomycetes,train_val2018/Fungi/4900/cbb41cb8edf3cfe364e4e...,600,800,0,2016-08-02,28.367144,0.5852,-81.437020,train,rubrocinctum
1,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,4900,Arthoniales,Herpothallon,Arthoniomycetes,train_val2018/Fungi/4900/755adc87e72ccb246b5b1...,600,800,29,2015-10-24,28.367783,0.8132,-81.436785,train,rubrocinctum
2,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,4900,Arthoniales,Herpothallon,Arthoniomycetes,train_val2018/Fungi/4900/0be8000dcc267e8538152...,600,800,0,2016-12-22,28.300703,0.9753,-81.453300,train,rubrocinctum
3,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,4900,Arthoniales,Herpothallon,Arthoniomycetes,train_val2018/Fungi/4900/e3199adcb85cdf4b66e00...,600,800,0,2016-12-22,28.300703,0.9753,-81.453300,train,rubrocinctum
4,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,4900,Arthoniales,Herpothallon,Arthoniomycetes,train_val2018/Fungi/4900/7fc01fe5971cc9d8a5e0c...,600,800,0,2016-07-20,28.360108,0.5495,-81.210864,train,rubrocinctum
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7822,Tremella mesenterica,Tremellaceae,Basidiomycota,5219,Tremellales,Tremella,Tremellomycetes,train_val2018/Fungi/5219/e1321c2693dbcdcd8b56d...,800,600,3080,2012-03-25,42.472635,0.2280,-71.529176,validation,mesenterica
7823,Tremella mesenterica,Tremellaceae,Basidiomycota,5219,Tremellales,Tremella,Tremellomycetes,train_val2018/Fungi/5219/4ca620046002ebc2c6da0...,800,696,2500,2010-08-02,35.676102,0.5852,-83.513951,validation,mesenterica
7824,Entomophthora muscae,Entomophthoraceae,Zygomycota,5220,Entomophthorales,Entomophthora,Zygomycetes,train_val2018/Fungi/5220/6a16cf13d427f97e75553...,600,800,0,2016-08-04,37.803975,0.5907,-122.265342,validation,muscae
7825,Entomophthora muscae,Entomophthoraceae,Zygomycota,5220,Entomophthorales,Entomophthora,Zygomycetes,train_val2018/Fungi/5220/ae3ad120bee653f6642a2...,800,657,0,2016-08-19,37.803967,0.6319,-122.265495,validation,muscae
