### Fish classifier

Author: Łukasz Szarecki 

Dataset: https://www.kaggle.com/aungpyaeap/fish-market

1. Uploading data to pandas dataframes

In [15]:
import pandas as pd
import numpy as np
fish_df = pd.read_csv("Fish.csv")
print(fish_df.head())

  Species  Weight  Length1  Length2  Length3   Height   Width
0   Bream   242.0     23.2     25.4     30.0  11.5200  4.0200
1   Bream   290.0     24.0     26.3     31.2  12.4800  4.3056
2   Bream   340.0     23.9     26.5     31.1  12.3778  4.6961
3   Bream   363.0     26.3     29.0     33.5  12.7300  4.4555
4   Bream   430.0     26.5     29.0     34.0  12.4440  5.1340


2. We have to mix data because there are sorted.
3. Preapering training and testing data.

In [16]:
#sorting dataset
fish_df = fish_df.sample(frac=1)
#corect indexing
fish_df = fish_df.reset_index(drop=True)
print(fish_df.head())
print(f'There are {len(fish_df)} samples')

  Species  Weight  Length1  Length2  Length3  Height   Width
0   Perch    40.0     13.8     15.0     16.0  3.8240  2.4320
1   Roach    40.0     12.9     14.1     16.2  4.1472  2.2680
2    Pike   456.0     40.0     42.5     45.5  7.2800  4.3225
3   Smelt     7.5     10.0     10.5     11.6  1.9720  1.1600
4   Perch   120.0     20.0     22.0     23.5  5.6400  3.5250
There are 159 samples


3. Input and output data

In [25]:
#input
X = np.array(fish_df.iloc[:, 1::])
print(X.shape)
#output
Y = (fish_df.iloc[:,0])
print(Y.shape)

(159, 6)
(159,)


### Encoder class

3. Each fish will have its own identifier
* fit() - create connection between identifiers and fish names
* transform() - fish name (str) to id (int)
* inverse_transform() - id (int) to fish name (str)

In [46]:
class Encoder():
    def __init__(self):
        self.names = [] #to not store duplicated names
        self.num_classes = 0 #number of all classes
    def fit(self, y):
        for sample in y:
            if sample not in self.names:
                self.names.append(sample)
        self.num_classes = len(self.names)
    def transform(self, y):
        encoded_samples = np.zeros(len(y))
        for index,sample in enumerate(y):
            encoded_samples[index] = self.names.index(sample)
        return encoded_samples
    def inverse_transform(self, encoded_y):
        samples = []
        for sample in encoded_y:
            samples.append(self.names[int(sample)])
        return samples

In [48]:
encoder = Encoder()
encoder.fit(Y)
# print(encoder.names)
print(encoder.num_classes) 

test_list = ['Pike', 'Smelt', 'Perch', 'Pike', 'Pike', 'Bream', 'Bream', 'Roach']
encoded_num_test = encoder.transform(test_list)
print(encoder.inverse_transform(encoded_num_test))

7
['Pike', 'Smelt', 'Perch', 'Pike', 'Pike', 'Bream', 'Bream', 'Roach']
