In [113]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

In [114]:
class ELMRegressor():
    def __init__(self, n_hidden_units):
        self.n_hidden_units = n_hidden_units

    def fit(self, X, labels):
        X = np.column_stack([X, np.ones([X.shape[0], 1])])
        self.random_weights = np.random.randn(X.shape[1], self.n_hidden_units)
        G = np.tanh(X.dot(self.random_weights))
        self.w_elm = np.linalg.pinv(G).dot(labels)

    def predict(self, X):
        X = np.column_stack([X, np.ones([X.shape[0], 1])])
        G = np.tanh(X.dot(self.random_weights))
        return G.dot(self.w_elm)

In [115]:
diabetes = load_diabetes()

In [116]:
diabetes

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]], shape=(442, 10)),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142.

In [117]:
x,y = diabetes['data'],diabetes['target']


In [118]:
# train 80% , test 20%
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

In [119]:
elm = ELMRegressor(200)

In [120]:
elm.fit(x_train,y_train)

In [121]:
# Random Weight
print(elm.random_weights)

[[ 0.82483033 -0.26609636  1.70855481 ...  0.53830208  2.00923427
  -0.53893919]
 [-1.18705968 -0.31584066  0.2564418  ...  1.51262069  0.07991519
  -0.77512242]
 [-0.65769941  0.42470868  0.201327   ... -0.4227244  -0.86106501
   0.21940137]
 ...
 [ 0.24955926  1.5649138   1.2688578  ...  1.24776933 -0.28094716
   1.33837964]
 [-0.39655056  0.27463895 -1.03825845 ...  0.77063532  1.32209663
   1.28974733]
 [-0.29028749 -0.77430207  1.42128303 ... -0.19997033  1.29737826
   0.56871656]]


In [122]:
# Weight Hidden Layer Calculated
print(elm.w_elm)

[-2.99520337e+04  1.83478854e+04 -6.92829862e+04 -9.74384004e+03
  7.17932164e+03 -8.87893881e+03 -1.64779399e+03 -7.18198837e+03
 -5.11394764e+03 -6.58176633e+03  1.98632621e+04 -3.27525357e+02
  1.19121787e+03 -3.73902044e+03 -1.80829603e+04 -6.00285498e+03
  5.59726337e+02  3.21686436e+05 -1.31632101e+04 -8.03918688e+04
  9.00554225e+04 -5.45824896e+04  2.77939439e+04 -3.49781427e+04
 -2.66893286e+03  2.62531412e+04 -7.48304474e+03 -5.84000815e+04
  2.42899254e+04  9.75102080e+04  2.37240894e+04 -1.24366600e+03
  7.42002782e+03 -2.12063698e+04  3.95456842e+04  7.62215013e+03
  1.43976680e+04 -6.67412807e+03  5.47635533e+04  7.44496472e+04
 -5.57373107e+04 -8.62696060e+03 -3.87442169e+04 -2.25127609e+03
  1.48748077e+04  1.19581370e+04 -1.49968564e+04  1.95907721e+04
 -5.53243180e+04  4.22001609e+03  1.92850663e+04  3.18374521e+04
 -1.96670427e+04 -1.69596412e+04  6.12997096e+04 -1.03362345e+04
  1.41793647e+03 -4.56015658e+03 -4.66850466e+04  2.00514626e+04
  2.08858972e+05 -3.31481

In [123]:
print(x_test[1,:])
print(y_test[1])
x=x_test[1,:].reshape(1,-1)
print(x)
y_pred = elm.predict(x)
print(y_pred)

[ 0.09256398 -0.04464164  0.03690653  0.02187239 -0.02496016 -0.01665815
  0.00077881 -0.03949338 -0.02251653 -0.02178823]
70.0
[[ 0.09256398 -0.04464164  0.03690653  0.02187239 -0.02496016 -0.01665815
   0.00077881 -0.03949338 -0.02251653 -0.02178823]]
[89.86652317]


In [124]:
#Evaluation
std_data = StandardScaler()
x_train = std_data.fit_transform(x_train)
x_test = std_data.transform(x_test)
max_y_train = max(abs(y_train))
y_train = y_train / max_y_train
y_test = y_test / max_y_train
elm = ELMRegressor(100)
elm.fit(x_train,y_train)
y_pred = elm.predict(x_test)
from sklearn.metrics import mean_absolute_error,mean_squared_error
print('MAE',mean_absolute_error(y_test,y_pred))
print("MSE",mean_squared_error(y_test,y_pred))

# ClassWork

### 1. ELM1
- InClass Test 5 data
- hidden units = 100, 200, 250
- columns = "People", "Y", "100units", "200units", "250units"
                            , data, error, data, error, data, error
- mae

In [141]:
data = pd.DataFrame(columns=["Y", "100units_pred", "100units_error", "200units_pred", "200units_error", "250units_pred", "250units_error"])
units = [100, 200, 250]
diabetes = load_diabetes()
x,y = diabetes['data'],diabetes['target']
# train 80% , test 20%
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)
work_x_test = x_test[:5]
work_y_test = y_test[:5]
for unit in units:
  elm = ELMRegressor(unit)
  elm.fit(x_train, y_train)
  y_pred = elm.predict(work_x_test)
  data['Y'] = work_y_test
  data[f'{unit}units_pred'] = y_pred
  data[f'{unit}units_error'] = abs(data['Y']-data[f'{unit}units_pred']) # Absolute deviation

# Row Total MAD ERROR
# Append "Y" last row for "Total ERROR" text
data.loc['Total ERROR'] = data[["100units_error", "200units_error", "250units_error"]].sum()
# MAD
data.loc['MAD', '100units_error'] = data.loc['Total ERROR', '100units_error'] / len(work_y_test) # Mean Absolute deviation
data.loc['MAD', '200units_error'] = data.loc['Total ERROR', '200units_error'] / len(work_y_test) # Mean Absolute deviation
data.loc['MAD', '250units_error'] = data.loc['Total ERROR', '250units_error'] / len(work_y_test) # Mean Absolute deviation

# Convert all columns to numeric
data = data.apply(pd.to_numeric, errors='coerce')

# find min mad and print amount units should be used
min_mad = data.loc['MAD'].min()
min_mad_unit = data.loc['MAD'].idxmin()
print("No Norm")
print("Shound be used", min_mad_unit.replace('units_error', ''), "units with MAD", min_mad)

# Set NaN values to empty string
data = data.fillna('')

data

No Norm
Shound be used 250 units with MAD 27.725784895339892


Unnamed: 0,Y,100units_pred,100units_error,200units_pred,200units_error,250units_pred,250units_error
0,219.0,128.876592,90.123408,124.529053,94.470947,266.012738,47.012738
1,70.0,148.499966,78.499966,111.73304,41.73304,40.806294,29.193706
2,202.0,155.155965,46.844035,99.678053,102.321947,207.427584,5.427584
3,230.0,237.590495,7.590495,-10.810924,240.810924,269.958889,39.958889
4,111.0,127.151438,16.151438,115.760828,4.760828,128.036008,17.036008
Total ERROR,,,239.209342,,484.097686,,138.628924
MAD,,,47.841868,,96.819537,,27.725785
