# Set working directory

First we need to change the directory that we are working in, if necessary.<br>
By running the first cell we´ll see what directory we currently are in, the second cell goes to the parent of the current directory. <br>The third cell will confirm what directory we work in.

In [2]:
import os
working_dir = os.getcwd()
print(f"You are now working in {working_dir}")
print("If you need to change to the parent directory, run the cell below")

You are now working in c:\Users\fredd\Desktop\Studier\Project5\mushrooms\Mushroom_edible_gallery\jupyter_notebooks
If you need to change to the parent directory, run the cell below


In [3]:
os.chdir(os.path.dirname(working_dir))

In [4]:
working_dir = os.getcwd()
print(f"You have now changed your working directory to {working_dir}")

You have now changed your working directory to c:\Users\fredd\Desktop\Studier\Project5\mushrooms\Mushroom_edible_gallery


## Set output directory

In [None]:
version = 'v1'
file_path = f'outputs/{version}'

if 'outputs' in os.listdir(working_dir) and version in os.listdir(working_dir + '/outputs'):
    print("This version already exists, create a new version if you are working on a new version")
    pass
else:
    os.makedirs(name=file_path)

This version already exists, create a new version


<hr>


# Data preparation

Check through all files if there are files that isn´t an image

In [None]:
import os

"""
This function will search through the raw dataset
for files that doesn´t have the extensions
that we typed in at 'image_extension'.

When going through all the files the arrays for
image_files and non_image_files are filled with
result that in the end will be printed out.    
"""


def search_non_image_files(working_dir):
    
    image_extension = ('.png', '.jpg', '.jpeg') # file extensions to search for
    
    non_image_files = [] # array for files without the extension searched for
    image_files = [] # array for image files with the extension searched for
    
    folders = os.listdir(working_dir)
    for folder in folders:
        files = os.listdir(os.path.join(working_dir, folder))
        
        for file in files: 
            file_location = os.path.join(working_dir, folder, file)
            if not file.lower().endswith(image_extension):
                non_image_files.append(file_location)
            else:
                image_files.append(file_location)
    

    print("Total amount of folders searched:", len(folders))
    print("Total image files found:", len(image_files))
    print("Total non image files found:", len(non_image_files))
    
    return image_files, non_image_files

Now lets run this function to see if there are non image files in our folders.

In [6]:
search_non_image_files(working_dir='input/dataset/raw/MO_94')

Total amount of folders searched: 94
Total image files found: 27436
Total non image files found: 0


(['input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_1.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_10.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_100.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_101.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_102.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_103.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_104.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_105.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_106.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_107.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_108.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_109.jpg',
  'input/dataset/raw/MO_94\\Agaricus augustus\\Agaricus augustus_11.jpg',
  'input/dataset/raw/MO_94\\A

In [41]:
import joblib

def name_of_the_species():
    
    folders = 'input/dataset/raw/MO_94'
    
    
    species = []
    for directory in os.listdir(folders):
        directory_path = os.path.join(folders, directory)
        if os.path.isdir(directory_path):
            species.append(directory)
            
    
    print("These are the species we are handling:")
    for specie in species:
        print(specie)
    
    return species

species = name_of_the_species()
print("Amount of species:", len(species))  

# Save species as a .pkl-fil
file_path = 'outputs/v1'  
joblib.dump(value=species, filename=f"{file_path}/species.pkl")



These are the species we are handling:
Agaricus augustus
Agaricus xanthodermus
Amanita amerirubescens
Amanita augusta
Amanita brunnescens
Amanita calyptroderma
Amanita flavoconia
Amanita muscaria
Amanita persicina
Amanita phalloides
Amanita velosa
Armillaria mellea
Armillaria tabescens
Artomyces pyxidatus
Bolbitius titubans
Boletus pallidus
Boletus rex-veris
Cantharellus californicus
Cantharellus cinnabarinus
Cerioporus squamosus
Chlorophyllum brunneum
Chlorophyllum molybdites
Clitocybe nuda
Coprinellus micaceus
Coprinopsis lagopus
Coprinus comatus
Crucibulum laeve
Cryptoporus volvatus
Daedaleopsis confragosa
Entoloma abortivum
Flammulina velutipes
Fomitopsis mounceae
Galerina marginata
Ganoderma applanatum
Ganoderma curtisii
Ganoderma oregonense
Ganoderma tsugae
Gliophorus psittacinus
Gloeophyllum sepiarium
Grifola frondosa
Gymnopilus luteofolius
Hericium coralloides
Hericium erinaceus
Hygrophoropsis aurantiaca
Hypholoma fasciculare
Hypholoma lateritium
Hypomyces lactifluorum
Ischnode

['outputs/v1/species.pkl']

In [43]:
# Load the .pkl file to control if the correct info is saved
loaded_species = joblib.load(f"{file_path}/species.pkl")
print("Total amount of species:", len(loaded_species))
for species in loaded_species:
    print(species)

Total amount of species: 94
Agaricus augustus
Agaricus xanthodermus
Amanita amerirubescens
Amanita augusta
Amanita brunnescens
Amanita calyptroderma
Amanita flavoconia
Amanita muscaria
Amanita persicina
Amanita phalloides
Amanita velosa
Armillaria mellea
Armillaria tabescens
Artomyces pyxidatus
Bolbitius titubans
Boletus pallidus
Boletus rex-veris
Cantharellus californicus
Cantharellus cinnabarinus
Cerioporus squamosus
Chlorophyllum brunneum
Chlorophyllum molybdites
Clitocybe nuda
Coprinellus micaceus
Coprinopsis lagopus
Coprinus comatus
Crucibulum laeve
Cryptoporus volvatus
Daedaleopsis confragosa
Entoloma abortivum
Flammulina velutipes
Fomitopsis mounceae
Galerina marginata
Ganoderma applanatum
Ganoderma curtisii
Ganoderma oregonense
Ganoderma tsugae
Gliophorus psittacinus
Gloeophyllum sepiarium
Grifola frondosa
Gymnopilus luteofolius
Hericium coralloides
Hericium erinaceus
Hygrophoropsis aurantiaca
Hypholoma fasciculare
Hypholoma lateritium
Hypomyces lactifluorum
Ischnoderma resinos