# Multiclass version of our mushroom classifier

In [1]:
MW_DATASET_VERSION = 'mushroom_world_2017_10_30'
MW_DATASET_URL = 'https://s3.eu-central-1.amazonaws.com/deep-shrooms/{}.zip'.format(MW_DATASET_VERSION)
MW_DATASET_PATH = 'data/{}/'.format(MW_DATASET_VERSION)

LP_DATASET_VERSION = 'luontoportti_2017_10_30'
LP_DATASET_URL = 'https://s3.eu-central-1.amazonaws.com/deep-shrooms/{}.zip'.format(LP_DATASET_VERSION)
LP_DATASET_PATH = 'data/{}/'.format(LP_DATASET_VERSION)

NUM_CLASSES = 10
INPUT_SHAPE = (256, 256, 3)
BATCH_SIZE = 128
EPOCHS = 12

In [2]:
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile

def import_data_set(url):
    with urlopen(url) as zip_response:
        file_size = int(zip_response.headers.get('content-length'))
        print("File {} is {} bytes large".format(url, file_size))
        with ZipFile(BytesIO(zip_response.read())) as zfile:
            zfile.extractall('./data')


In [3]:
import_data_set(MW_DATASET_URL)
import_data_set(LP_DATASET_URL)

File https://s3.eu-central-1.amazonaws.com/deep-shrooms/mushroom_world_2017_10_30.zip is 7200944 bytes large
File https://s3.eu-central-1.amazonaws.com/deep-shrooms/luontoportti_2017_10_30.zip is 11244003 bytes large


In [4]:
import pandas as pd
import numpy as np

mw_mushroom_classes = pd.read_json(MW_DATASET_PATH + 'mushroom_classes.json', lines=True)
mw_mushroom_imgs = pd.read_json(MW_DATASET_PATH + 'mushroom_imgs.json', lines=True)
mw_mushroom_info = mw_mushroom_imgs.merge(mw_mushroom_classes, how = "right", on = "name_latin")

lp_mushroom_classes = pd.read_csv(LP_DATASET_PATH + 'lp_mushroom_classes.csv', sep='\t')
lp_mushroom_imgs = pd.read_csv(LP_DATASET_PATH + 'lp_mushroom_imgs.csv', sep='\t')

In [5]:
print(mw_mushroom_info.edibility.value_counts())
print(lp_mushroom_imgs.edibility.value_counts())

inedible                202
edible                  149
poisonous                77
edible and good          50
edible and excellent     27
lethally poisonous       16
edible when cooked       15
Name: edibility, dtype: int64
syötävyys: *** – erinomainen ruokasieni                                                                                  137
syötävyys: * – syötävä ruokasieni                                                                                        134
syötävyys: *** – erinomainen ruokasieni, eviran suosittelema                                                             124
syötävyys: ** – hyvä ruokasieni                                                                                          101
syötävyys: ** – hyvä ruokasieni, eviran suosittelema                                                                      72
syötävyys: o* – keittämisen jälkeen syötävä ruokasieni                                                                    51
syötävyys: x – käyttökel