In [156]:
import numpy as np
import pandas as pd
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR

from evolutionary_search import EvolutionaryAlgorithmSearchCV
from sklearn.metrics import mean_squared_error

import warnings

In [16]:
path='BLE_RSSI_dataset/iBeacon_RSSI_Labeled.csv'
df = read_csv(path, index_col=None)
df.head()

Unnamed: 0,location,date,b3001,b3002,b3003,b3004,b3005,b3006,b3007,b3008,b3009,b3010,b3011,b3012,b3013
0,O02,10-18-2016 11:15:21,-200,-200,-200,-200,-200,-78,-200,-200,-200,-200,-200,-200,-200
1,P01,10-18-2016 11:15:19,-200,-200,-200,-200,-200,-78,-200,-200,-200,-200,-200,-200,-200
2,P01,10-18-2016 11:15:17,-200,-200,-200,-200,-200,-77,-200,-200,-200,-200,-200,-200,-200
3,P01,10-18-2016 11:15:15,-200,-200,-200,-200,-200,-77,-200,-200,-200,-200,-200,-200,-200
4,P01,10-18-2016 11:15:13,-200,-200,-200,-200,-200,-77,-200,-200,-200,-200,-200,-200,-200


### Making Coord Segmentation (getting the location letters into numbers)

In [22]:
def pos_segmentation(x_cord):
    x = 87 - ord(x_cord.upper())
    return x

In [30]:
path='BLE_RSSI_dataset/iBeacon_RSSI_Labeled.csv'
df = pd.read_csv(path, index_col=None)
df['x'] = df['location'].str[0]
df['y'] = df['location'].str[1:]
df.drop(["location"], axis = 1, inplace = True)
df["x"] = df["x"].apply(pos_segmentation)
df["y"] = df["y"].astype(int)

## Final Dataset

In [173]:
df.head()

Unnamed: 0,date,b3001,b3002,b3003,b3004,b3005,b3006,b3007,b3008,b3009,b3010,b3011,b3012,b3013,x,y
0,10-18-2016 11:15:21,-200,-200,-200,-200,-200,-78,-200,-200,-200,-200,-200,-200,-200,8,2
1,10-18-2016 11:15:19,-200,-200,-200,-200,-200,-78,-200,-200,-200,-200,-200,-200,-200,7,1
2,10-18-2016 11:15:17,-200,-200,-200,-200,-200,-77,-200,-200,-200,-200,-200,-200,-200,7,1
3,10-18-2016 11:15:15,-200,-200,-200,-200,-200,-77,-200,-200,-200,-200,-200,-200,-200,7,1
4,10-18-2016 11:15:13,-200,-200,-200,-200,-200,-77,-200,-200,-200,-200,-200,-200,-200,7,1


In [184]:
x = df.iloc[:, 1:-2] #RSSI data
y = df.iloc[:, -2:] #Coord data

# Train Test Split 

In [185]:
def split_data(X_train, X_val, y_train, y_val):
    split_index = [-1]*len(X_train) + [0]*len(X_val)
    X = np.concatenate((X_train, X_val), axis=0)
    y = np.concatenate((y_train, y_val), axis=0)
    pds = PredefinedSplit(test_fold=split_index)
    return X, y, pds

In [186]:
X_train, X_val, y_train, y_val = train_test_split(x,y, test_size =.3)
X_test, X_val, y_test, y_val = train_test_split(X_val,y_val, test_size =.5)
X_cv, y_cv, pds = split_data(X_train, X_val, y_train, y_val)

## Regressors Parameters

In [187]:
rf_params = {
        'n_estimators': [1, 2, 5, 10, 15, 20, 23, 24, 25, 30, 35, 40, 45, 50],
        'max_depth': [2, 5, 10, 15, 20, 25, 30, 50],
        'min_samples_leaf': [1, 2, 4, 8, 16, 32, 64],
        'bootstrap': [True, False],
    }


In [188]:
reg_dict={'Random Forest': [rf_params, RandomForestRegressor(random_state=0)]}

In [189]:
def get_data(estimator):
    search_space = reg_dict[estimator][0]
    clf = reg_dict[estimator][1]
    return search_space, clf

## Genetic Algorithm

In [190]:
def GeneticSearch(X, y, X_test, y_test, pds, estimator, search_space_dict, estimator_type):
    clf = EvolutionaryAlgorithmSearchCV(estimator=estimator, params=search_space_dict, scoring= None, cv=pds,
                                        verbose=1,
                                        population_size=20,
                                        gene_mutation_prob=0.2,
                                        gene_crossover_prob=0.5,
                                        tournament_size=3,
                                        generations_number=50,
                                        n_jobs=1)
    clf.fit(X, y)
    yp=clf.best_estimator_.predict(X_test)
    test_acc= mean_squared_error(y_test,yp)
    print(f'\n{estimator_type}')
    print(f'Genetic Search Best param: {clf.best_params_}')
    print(f'Genetic Search Best Score: {str(clf.best_score_)}')
    print(f'Genetic Search Test MSE: {test_acc}')

    return [clf.best_params_, clf.best_score_, test_acc, yp]


In [191]:
warnings.filterwarnings("ignore")
for estimator in ['Random Forest']:
    search_space,clf = get_data(estimator)
    GeneticSearch(X_cv,y_cv, X_test, y_test, pds, clf, search_space, estimator)

Types [1, 1, 1, 1] and maxint [13, 7, 6, 1] detected
--- Evolve in 1568 possible combinations ---
gen	nevals	avg     	min     	max     	std     
0  	20    	0.703488	0.407744	0.899498	0.183972
1  	11    	0.875287	0.704326	0.899498	0.0444714
2  	14    	0.891214	0.881743	0.899498	0.00699632
3  	12    	0.896555	0.885443	0.899498	0.00548431
4  	15    	0.899502	0.899498	0.89957 	1.55691e-05
5  	9     	0.877956	0.465803	0.902071	0.0945561  
6  	5     	0.898586	0.885871	0.902071	0.00419568 
7  	11    	0.882209	0.704326	0.902071	0.0566478  
8  	11    	0.887444	0.625928	0.902071	0.0600729  
9  	12    	0.893621	0.749475	0.902071	0.0332092  
10 	10    	0.878706	0.465803	0.902071	0.0948404  
11 	12    	0.899357	0.852553	0.902071	0.0107874  
12 	12    	0.902071	0.902071	0.902071	0          
13 	10    	0.901263	0.888169	0.902071	0.00304395 
14 	16    	0.901247	0.888169	0.902071	0.00305214 
15 	20    	0.894267	0.749475	0.902071	0.0332262  
16 	14    	0.887521	0.643911	0.902071	0.0561032  
17 	16    	0