In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler

In [2]:
root = Path.cwd().parent
data = root / 'data'

In [3]:
df = pd.read_csv(data / 'train_val.csv', parse_dates=['date'])

In [27]:
class_priors = np.zeros(len(df['class_id'].unique()))
for species in df['class_id'].unique():
    class_priors[species] = len(df[df['class_id'] == species])

class_priors = class_priors/sum(class_priors)

month_distributions = {}

for _, observation in tqdm(df.iterrows(), total=len(df)):
    month = str(observation['date'].month)
    class_id = observation.class_id
    if month not in month_distributions:        
        month_distributions[month] = np.zeros(len(df['class_id'].unique()))
    else:
        month_distributions[month][class_id] += 1

for key, value in month_distributions.items():
    month_distributions[key] = value / sum(value)

100%|██████████| 194743/194743 [00:51<00:00, 3810.82it/s]


In [18]:
df['norm_date'] = MinMaxScaler().fit_transform(df[['days']])
df.drop('date_c', axis=1).to_csv(data / 'train_val.csv', index=False)

In [32]:
import pickle

pickle.dump(month_distributions, open(root / 'month_distributions.pkl', 'wb'))
pickle.dump(class_priors, open(root / 'class_priors.pkl', 'wb'))

In [4]:
df

Unnamed: 0,name,family,phylum,order,genus,class,file_name,height,width,location_uncertainty,...,image_dir_name,dataset,common_name,file_path,gcs_path,month,class_id,year,days,norm_date
0,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,cbb41cb8edf3cfe364e4e3b083ab7a24.jpg,600,800,0.0,...,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,8,185,2016,215,0.586301
1,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,755adc87e72ccb246b5b177efb0b8715.jpg,600,800,29.0,...,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,10,185,2015,297,0.810959
2,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,0be8000dcc267e8538152788dd4a05fe.jpg,600,800,0.0,...,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,12,185,2016,357,0.975342
3,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,e3199adcb85cdf4b66e0066a6957788a.jpg,600,800,0.0,...,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,12,185,2016,357,0.975342
4,Herpothallon rubrocinctum,Arthoniaceae,Ascomycota,Arthoniales,Herpothallon,Arthoniomycetes,7fc01fe5971cc9d8a5e0ccd49fad91af.jpg,600,800,0.0,...,Fungi_Ascomycota_Arthoniomycetes_Arthoniales_A...,2018,,Mushroom-Classifier/data/train/Fungi_Ascomycot...,gs://mush-img-repo/train/ Fungi_Ascomycota_Art...,7,185,2016,202,0.550685
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194738,Tremella mesenterica,Tremellaceae,Basidiomycota,Tremellales,Tremella,Tremellomycetes,51f13b0b-f4f7-46ed-9e53-4d0c63ddad6b.jpg,375,500,13.0,...,Fungi_Basidiomycota_Tremellomycetes_Tremellale...,2021,witch's butter,Mushroom-Classifier/data/train/Fungi_Basidiomy...,gs://mush-img-repo/train/ Fungi_Basidiomycota_...,5,435,2019,131,0.356164
194739,Tremella mesenterica,Tremellaceae,Basidiomycota,Tremellales,Tremella,Tremellomycetes,294cf8dc-bfaa-45b7-a7a8-205d1a22e33d.jpg,375,500,44.0,...,Fungi_Basidiomycota_Tremellomycetes_Tremellale...,2021,witch's butter,Mushroom-Classifier/data/train/Fungi_Basidiomy...,gs://mush-img-repo/train/ Fungi_Basidiomycota_...,6,435,2019,160,0.435616
194740,Tremella mesenterica,Tremellaceae,Basidiomycota,Tremellales,Tremella,Tremellomycetes,b3b8cb90-d5d6-4118-826f-d69c6803a11a.jpg,341,500,197.0,...,Fungi_Basidiomycota_Tremellomycetes_Tremellale...,2021,witch's butter,Mushroom-Classifier/data/train/Fungi_Basidiomy...,gs://mush-img-repo/train/ Fungi_Basidiomycota_...,6,435,2019,167,0.454795
194741,Tremella mesenterica,Tremellaceae,Basidiomycota,Tremellales,Tremella,Tremellomycetes,f38f1edd-34f2-4e8d-aaeb-c95ed22dd1ca.jpg,500,410,15.0,...,Fungi_Basidiomycota_Tremellomycetes_Tremellale...,2021,witch's butter,Mushroom-Classifier/data/train/Fungi_Basidiomy...,gs://mush-img-repo/train/ Fungi_Basidiomycota_...,10,435,2018,282,0.769863
