In [27]:
#!/usr/bin/env python
# coding: utf-8
'''
construct model for points cloud input based on XG-Boost
'''

import sys
root_dir = '../../'
sys.path.append(root_dir)
sys.path.append(root_dir + 'Data/corner/scripts')

import os
import psutil
import matplotlib
# matplotlib.use('agg') # so that plt works in command line
import numpy as np
import matplotlib.pyplot as plt
import sklearn.ensemble as sken
import sklearn.feature_selection as skfs
import Data_Feeder as feeder
import Metric_Recorder as recorder
import Model_Constructer as constructer

from optparse import OptionParser

In [75]:
data_path = root_dir + 'Data/corner/dw_19991231_162610_0.000000_0.000000/'
class_num = 2
line_re = '\t (?!3).*' # only corner radar
select_cols = [2,3,4,5,6,7,8,9] # all valid cols

dataset = feeder.Corner_Radar_Points_Gen_Feeder(data_path, 
                                                class_num=class_num, 
                                                use_onehot=False, 
                                                line_re=line_re, 
                                                select_cols=select_cols)
points, labels = dataset.get_all_data(allowed=True)
points = np.vstack(points)
labels = np.concatenate(labels)

print(dataset.feature_names)

['x', 'y', 'angle', 'distance', 'speed', 'rcs', 'v_x', 'v_y']


In [76]:
std_points = points.copy()
std_points -= std_points.mean(axis=0)
std_points /= std_points.std(axis=0)
std_points[np.where(std_points != std_points)] = 0

minmax_point = std_points.copy()
minmax_point = (minmax_point - minmax_point.min(axis=0)) / (minmax_point.max(axis=0) - minmax_point.min(axis=0))
minmax_point[np.where(minmax_point != minmax_point)] = 0

In [101]:
var_mat = np.zeros((points.shape[1],points.shape[1]))
for i in range(points.shape[1]):
    for j in range(i,points.shape[1]):
        var_mat[i,j] = ((points[:,i] - points[:,j])**2).sum() / points.shape[0]

In [102]:
print('%-8s' % ' ', end='')
for i in range(points.shape[1]):
    print('%-8s' % dataset.feature_names[i], end=' ')
print()
for i in range(points.shape[1]):
    print('%-8s' % dataset.feature_names[i], end=' ')
    for n in var_mat[i]:
        if n > 0:
            print('%-8.3f' % n, end=' ')
        else:
            print('%-8d' % 0, end=' ')
    print()

        x        y        angle    distance speed    rcs      v_x      v_y      
x        0        733.465  497.505  1473.130 921.689  638.998  1281.099 621.421  
y        0        0        225.865  1190.044 663.771  357.691  562.010  562.482  
angle    0        0        0        787.520  428.737  126.242  305.594  140.406  
distance 0        0        0        0        1844.512 1124.648 1073.781 868.834  
speed    0        0        0        0        0        441.044  1024.825 552.629  
rcs      0        0        0        0        0        0        394.272  246.435  
v_x      0        0        0        0        0        0        0        401.622  
v_y      0        0        0        0        0        0        0        0        


## uni-variate selection

In [25]:
transformer = skfs.GenericUnivariateSelect(skfs.chi2, 'k_best')
transformer.fit(minmax_point, labels)
for n, s, p in zip(dataset.feature_names, transformer.scores_, transformer.pvalues_):
    print("%-10s %-10.5f %-10.5f" % (n,s,p))

group_id   25.34446   0.00000   
target_id  2158.44373 0.00000   
x          9.98067    0.00158   
y          8.70843    0.00317   
angle      705.98211  0.00000   
distance   2523.04772 0.00000   
speed      1041.65435 0.00000   
rcs        2271.05261 0.00000   
v_x        46.23807   0.00000   
v_y        0.23812    0.62556   
lat_v      nan        nan       


## recursive feature elimination

In [44]:
estimator = sken.RandomForestClassifier(10)
rfe = skfs.RFE(estimator=estimator, verbose=1)
rfe.fit(points, labels)

for n, r in zip(dataset.feature_names, rfe.ranking_):
    print('%-10s %d' % (n, r))
print('=>', [dataset.feature_names[i] for i in np.argsort(rfe.ranking_)])

group_id   10
target_id  8
x          6
y          1
angle      5
distance   2
speed      7
rcs        3
v_x        4
v_y        9
lat_v      11
=> ['y', 'distance', 'rcs', 'v_x', 'angle', 'x', 'speed', 'target_id', 'v_y', 'group_id', 'lat_v']


In [58]:
estimator = sken.RandomForestClassifier(10)
cv_fold = 4
rfecv = skfs.RFECV(estimator=estimator, cv=cv_fold, verbose=1, n_jobs=cv_fold, min_features_to_select=1)
rfecv.fit(points, labels)

Fitting estimator with 11 features.
Fitting estimator with 10 features.


RFECV(cv=4,
   estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
   min_features_to_select=1, n_jobs=4, scoring=None, step=1, verbose=1)

In [59]:
print('%-10s %-5s %s' % ('name', 'score', 'cv_score'))
for n, r, s in zip(dataset.feature_names, rfecv.ranking_, rfecv.grid_scores_):
    print('%-10s %-5d %-.3f' % (n, r, s))
print('=>', [dataset.feature_names[i] for i in np.argsort(rfecv.ranking_)])

name       score cv_score
group_id   2     0.961
target_id  1     0.977
x          1     0.980
y          1     0.982
angle      1     0.984
distance   1     0.984
speed      1     0.985
rcs        1     0.985
v_x        1     0.986
v_y        1     0.986
lat_v      3     0.986
=> ['target_id', 'x', 'y', 'angle', 'distance', 'speed', 'rcs', 'v_x', 'v_y', 'group_id', 'lat_v']
