In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

> Implementation of ELM Algorithm on Yeast Dataset

The dataset selected from the UCI Library link:
[https://archive.ics.uci.edu/ml/datasets/Yeast](http://)

Libraries used for implementing ELM:
1. Pandas for data processing and class label visualization
2. Matplotlib for plotting scatter and line plots
2. Seaborn for efficient facet visualization

The dataset can be labelled with headers/ column names in the read.csv command(Parameters) for efficient visualization.

The dataset has following attributes:
1. Class Label: 10 Class labels based on other attributes - CYT(0), ERL(1), EXC(2), ME1(3), ME2(4), ME3(5), MIT(6), NUC(7), POX(8) and VAC(9)
2. mcg: McGeoch's method for signal sequence recognition.
3. gvh: von Heijne's method for signal sequence recognition.
4. alm: Score of the ALOM membrane spanning region prediction program.
5. mit: Score of discriminant analysis of the amino acid content of the N-terminal region (20 residues long) of mitochondrial and non-mitochondrial proteins.
6. erl: Presence of "HDEL" substring (thought to act as a signal for retention in the endoplasmic reticulum lumen). Binary attribute.
7. pox: Peroxisomal targeting signal in the C-terminus.
8. vac: Score of discriminant analysis of the amino acid content of vacuolar and extracellular proteins.
9. nuc: Score of discriminant analysis of nuclear localization signals of nuclear and non-nuclear proteins.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

yeast = pd.read_csv("../input/yeastwithoutclass/yeast_classrow.csv", names=['classLabel', 'MCG', 'GVH', 'ALM', 'MIT', 'ERL', 'POX', 'VAC', 'NUC'])
#csv_dataset.loc[:,['class']].plot()
print("Database shape rows: %s columns:%s \n" % np.shape(yeast))
print(yeast.describe())
yeast.head()

After fetching the data, it's time to one hot encode the classes of the dataset - Transforming classes into column binary data for convinient array formations. Appending to the function, scatterplots of the class labels have been plotted for getting a rough idea about the count of the class labels.

In [None]:
def create_one_hot_encoding(classes, shape):
    one_hot_encoding = np.zeros(shape)
    for i in range(0, len(one_hot_encoding)):
        one_hot_encoding[i][int(classes[i])] = 1
    return one_hot_encoding


#Using Matlplotlib for plotting scatterplots

fig, ax = plt.subplots() 
# count the occurrence of each class 
data = yeast['classLabel'].value_counts() 
classLabel = data.index 
frequency = data.values 
# create bar chart 
ax.bar(classLabel, frequency) 
# set title and labels 
ax.set_title('Yeast Data labels') 
ax.set_xlabel('classLabel') 
ax.set_ylabel('Frequency')

yeast.drop(['classLabel'], axis=1).plot.line(title='Yeast Dataset Attributes')

Now we create a training function considering input(X), output(Y) and hidden layers(H) elements and get the maximum of the parameter. Next we return the Moore-Penrose co-efficeint (pseudo-inverse) of the  matrix(h,0,h) and dot it with 'y'. The Moore-Penrose of the weight matrix is printed in the final output (Output given in the end)

In [None]:
def training(weights, x, y):
    h = x.dot(weights)
    h = np.maximum(h, 0, h)
    pop = np.linalg.pinv(h).dot(y)
    print("The Moore Penrose (Pseudo-Inverse) of the weight matrix is as follows: ")
    print(pop)
    return np.linalg.pinv(h).dot(y)


Softmax function, a wonderful activation function that turns numbers aka logits into probabilities that sum to one. 
Softmax function outputs a vector that represents the probability distributions of a list of potential outcomes

> ***Uncomment the commented lines in the softmax matrix function to see the arrays formed in the final output***

In [None]:
def soft_max(layer):
    soft_max_output_layer = np.zeros(len(layer))
    for i in range(0, len(layer)):
        numitor = 0
        for j in range(0, len(layer)):
            numitor += np.exp(layer[j] - np.max(layer))
        soft_max_output_layer[i] = np.exp(layer[i] - np.max(layer)) / numitor
    return soft_max_output_layer

def matrix_soft_max(matrix_):
    soft_max_matrix = []
    for i in range(0, len(matrix_)):
        soft_max_matrix.append(soft_max(matrix_[i]))
        #popagain = soft_max_matrix
        #print(popagain)
    return soft_max_matrix

We will check the network's power by initializing the count to Zero and then adding it with the real number to return it.

In [None]:
def network_power_check(o, o_real):
    count = 0
    for i in range(0, len(o)):
        count += 1 if np.argmax(o[i]) == np.argmax(o_real[i]) else 0
    return count

Using Seaborn library to form facetgrids of the class label of Yeast dataset for data columns = 'VAC' & 'NUC'

In [None]:
import seaborn as sns

g = sns.FacetGrid(yeast, col='classLabel')
g = g.map(sns.kdeplot, 'VAC')

In [None]:
import seaborn as sns

g1 = sns.FacetGrid(yeast, col='classLabel')
g1 = g1.map(sns.kdeplot, 'NUC')

Create the testing function with the same parameters as of training, except beta. Here 'Beta'is the variable containing training(weights, x, y) values.

> ***Uncomment the commented line to view the soft max matrix/array in the final output (Before the accuracy line)***

In [None]:
def testing(weights, beta, x, y):
    h = x.dot(weights)
    h = np.maximum(h, 0, h)  # ReLU function
    o = matrix_soft_max(h.dot(beta))
    #print(o)
    return network_power_check(o, y) / len(y)

In the end, we will split-shuffle the dataset randomly into training and testing data, keeping the same test size and work the preprocess and normalize operations. All the fucntions will be called at once in the following code and the final output along with the accuracy will be printed. 

***The final Accuracy on the Yeast dataset ranges between 50- 65% on each run***

In [None]:
class_column = 0
test_size = 0.1
db = yeast.iloc[:, :].values.astype(np.float)
np.random.shuffle(db)
y = db[:, class_column]
y -= np.min(y)
output_layer_perceptron_count = len(np.unique(y))
y = create_one_hot_encoding(y, (len(y), len(np.unique(y))))
x = np.delete(db, [class_column], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size)
hidden_layer_perceptron_count = len(y_test)
x = preprocessing.normalize(x)
weights = np.random.random((len(x[0]), hidden_layer_perceptron_count))
beta = training(weights, x_train, y_train)
print("The Accuracy of ELM Algorithm on Yeast Dataset is = %s." % testing(weights, beta, x_test, y_test))